Hello everyone, I encountered this error when using the trainer package in transformers to train the model.
File "/home/hpc/xyp/tinyllama/sft/lora_peft.py", line 91, in <module>
trainer.train()
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/trainer.py", line 1938, in train
return inner_training_loop(
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/trainer.py", line 2279, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/trainer.py", line 3318, in training_step
loss = self.compute_loss(model, inputs)
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/trainer.py", line 3363, in compute_loss
outputs = model(**inputs)
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/peft/peft_model.py", line 762, in forward
return self.get_base_model()(*args, **kwargs)
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 1189, in forward
outputs = self.model(
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 971, in forward
causal_mask = self._update_causal_mask(
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 1086, in _update_causal_mask
causal_mask = _prepare_4d_causal_attention_mask_with_cache_position(
File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 104, in _prepare_4d_causal_attention_mask_with_cache_position
causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
RuntimeError: expand(torch.cuda.FloatTensor{[8, 8, 1, 1, 512]}, size=[8, 1, 1, 512]): the number of sizes provided (4) must be greater or equal to the number of dimensions in the tensor (5)
0%| | 0/42 [00:00<?, ?it/s]
I don’t know if it’s a problem with the transformer package or my data. I hope it can be solved.My source code is as follows:
import peft
from datasets import load_dataset
import json
import torch
from peft import LoraConfig
from torch.utils.data import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from transformers import Trainer
from transformers import DataCollatorWithPadding
class MyDataset(Dataset):
def __init__(self,data_file,tokenizer,max_length=512):
self.data = []
self.tokenizer = tokenizer
self.maxlen = max_length
for i in data_file:
with open(i, 'r') as f:
json_data = json.load(f)
for item in json_data:
instruction = item["instruction"]
input = item["input"]
output = item["output"]
combined_text = f"Instruction: {instruction}\nInput: {input}\nOutput: {output}"
tokenizer_data = tokenizer(
combined_text,
truncation = True,
padding = "max_length",
max_length = self.maxlen,
return_tensors = 'pt'
)
self.data.append(tokenizer_data)
def __len__(self):
return len(self.data)
def __getitem__(self, item):
return self.data[item]
if __name__ == '__main__':
lora_config = LoraConfig(
r=8,
lora_alpha=16,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.1,
bias="none",
)
train_data_file = ["../data/my_sw_ft_dataset.json","../data/my_code_ft_dataset.json"]
test_data_file = ["../data/my_all_eval_dataset.json"]
checkpoint = "/home/hpc/xyp/tinyllama"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint)
model = peft.get_peft_model(model,lora_config)
training_args = TrainingArguments(
output_dir="./results", # 输出结果路径
# gradient_accumulation_steps=16, # 梯度累积步数
#learning_rate=1e-4, # 学习率
#num_train_epochs=3, # 训练轮数
#logging_dir="./logs", # 日志文件路径
#logging_steps=10,
)
train_dataset = MyDataset(train_data_file,tokenizer)
test_dataset = MyDataset(test_data_file,tokenizer)
print(train_dataset[0]["input_ids"].shape)
print(test_dataset[0]["attention_mask"].shape)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=test_dataset,
tokenizer=tokenizer,
data_collator=data_collator,
)
trainer.train()
model.save_pretrained("./lora_llama")