i’m trying to fine-tune a mistral 7B model locally for a regression task, the code works and the loss is decreasing but the outputs when i run trainer.predict(test_data) are cut in the middle, so i assumed its about the max_length parameter in the tokenizer.
after i increase the length from 1024 to 2048, this error occurs.
here is my code:
import torch
import transformers
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel
from sklearn.model_selection import KFold
import os
import numpy as np
from dataset_utils import load_questionnaire
CUTOFF_LEN = 2048
LORA_R = 8
LORA_ALPHA = 2 * LORA_R
LORA_DROPOUT = 0.1
def tokenize(prompt):
return tokenizer(
prompt + tokenizer.eos_token,
truncation=True,
max_length=CUTOFF_LEN ,
padding="max_length"
)
def generate_prompt(user_query,is_instruct):
sys_msg = ("answer the following question:.\n"
"Provide your answer in the following JSON format: {\"score\": \"predicted_score\"}\n")
if is_instruct:
p = "<s> [INST]" + sys_msg + "\n" + user_query["text"] + "[/INST]" + "{\"score\":" + str(user_query["score"]) + "}</s>"
else:
p = "<s>" + sys_msg + "\n" + user_query["text"] + "{\"score\":" + str(user_query["score"]) + "}</s>"
return p
if __name__ == '__main__':
#model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
model_name = "mistralai/Mistral-7B-v0.1"
questionnaire = load_questionnaire('data/my_data.csv')
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)
total_test_results = []
for i, (train_index, test_index) in enumerate(kf.split(questionnaire)):
print(f"Fold {i}")
train_data = questionnaire.iloc[train_index]
test_data = questionnaire.iloc[test_index]
print('debug')
train_data = train_data[:15]
test_data = test_data[:15]
train_data = Dataset.from_pandas(train_data)
test_data = Dataset.from_pandas(test_data)
bnb_config = BitsAndBytesConfig(
load_in_4bit= True,
bnb_4bit_quant_type= "nf4",
bnb_4bit_compute_dtype= torch.bfloat16,
bnb_4bit_use_double_quant= False,
)
tokenizer = AutoTokenizer.from_pretrained(model_name,
padding_side="left",
add_eos_token=True,
add_bos_token=True)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name,
quantization_config=bnb_config,
torch_dtype=torch.float16,
device_map="auto")
# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)
config = LoraConfig(
r=16,
lora_alpha=16,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"]
)
model = get_peft_model(model, config)
train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x,is_instruct='instruct' in model_name.lower())), remove_columns=["text" , "score"])
test_data = test_data.map(lambda x: tokenize(generate_prompt(x,is_instruct='instruct' in model_name.lower())), remove_columns=["text", "score"])
trainer = Trainer(
model=model,
train_dataset=train_data,
args=TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
num_train_epochs=6,
learning_rate=1e-4,
logging_steps=2,
optim="adamw_torch",
save_strategy="epoch",
output_dir = f"mistral 7B lora-instruct-Qs (fold-{i})"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()
predictions = trainer.predict(test_data)
outputs = tokenizer.batch_decode(predictions[1])
test_results = trainer.evaluate(test_data)
print(f'test results for fold {i}:{test_results}')
total_test_results.append(test_results)
np.save('mixtral-moe-lora-instruct-Qs.npy', np.array(total_test_results))
the full stack trace:
Traceback (most recent call last):
File "/specific/a/home/cc/students/csguests/gros/.pycharm_helpers/pydev/_pydevd_bundle/pydevd_exec2.py", line 3, in Exec
exec(exp, global_vars, local_vars)
File "<input>", line 1, in <module>
AttributeError: 'LlamaTokenizerFast' object has no attribute 'special_tokens'
No chat template is defined for this tokenizer - using the default template for the LlamaTokenizerFast class. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.
Traceback (most recent call last):
File "/specific/a/home/cc/students/csguests/gros/.pycharm_helpers/pydev/pydevd.py", line 1534, in _exec
pydev_imports.execfile(file, globals, locals) # execute the script
File "/specific/a/home/cc/students/csguests/gros/.pycharm_helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/home/ai_center/ai_users/gros/firstPred/finetune_mixtral_7Bx8.py", line 143, in <module>
outputs = tokenizer.batch_decode(predictions[1])
File "/home/ai_center/ai_users/gros/miniconda3/envs/psyq/lib/python3.9/site-packages/transformers/tokenization_utils_base.py", line 3785, in batch_decode
return [
File "/home/ai_center/ai_users/gros/miniconda3/envs/psyq/lib/python3.9/site-packages/transformers/tokenization_utils_base.py", line 3786, in <listcomp>
self.decode(
File "/home/ai_center/ai_users/gros/miniconda3/envs/psyq/lib/python3.9/site-packages/transformers/tokenization_utils_base.py", line 3825, in decode
return self._decode(
File "/home/ai_center/ai_users/gros/miniconda3/envs/psyq/lib/python3.9/site-packages/transformers/tokenization_utils_fast.py", line 625, in _decode
text = self._tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)
OverflowError: out of range integral type conversion attempted
Process finished with exit code 1