Hi fellas, I am trying to create a chatbot from mpt-7b here is the code:
import torch
import transformers
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
model_name = './mpt-7b'
config = AutoConfig.from_pretrained(
model_name,
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
config=config,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto",
)
model.eval() # Evaluation mode is default, but calling it anyway
system_prompt = '''<|im_start|> system
You are am AI assistant
<|im_end|>\n
'''
user_message = '''
what is the meaning of life?
'''
fmt_user_message = f'<|im_start|>user {user_message}<|im_end|>\n'
input_ids = tokenizer(system_prompt + fmt_user_message, return_tensors="pt").input_ids
input_ids = input_ids.to(model.device)
generate_params = {
"max_new_tokens": 512,
"temperature": 1.0,
"top_p": 1.0,
"top_k": 50,
"use_cache": True,
"do_sample": True,
"eos_token_id": 0,
"pad_token_id": 0
}
generated_ids = model.generate(input_ids, **generate_params)
output = tokenizer.decode(generated_ids.cpu().tolist()[0], skip_special_tokens=True)
for line in output.split('\n'):
print(line)
Does this make sense, or should I be using mpt-7b-chat?
I am getting the error:
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_SUPPORTED when calling
cublasGemmStridedBatchedExFix(handle, opa, opb, (int)m, (int)n, (int)k, (void*)&falpha, a, CUDA_R_16BF, (int)lda, stridea, b, CUDA_R_16BF, (int)ldb, strideb, (void*)&fbeta, c, CUDA_R_16BF, (int)ldc, stridec, (int)num_batches, CUDA_R_32F, CUBLAS_GEMM_DEFAULT_TENSOR_OP)
Is this related with the choice of the model?
Thanks in advance