Using trainer to fine-tune the model gives an error. Seeking solution!

Hello everyone, I encountered this error when using the trainer package in transformers to train the model.

  File "/home/hpc/xyp/tinyllama/sft/lora_peft.py", line 91, in <module>
    trainer.train()
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/trainer.py", line 1938, in train
    return inner_training_loop(
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/trainer.py", line 2279, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/trainer.py", line 3318, in training_step
    loss = self.compute_loss(model, inputs)
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/trainer.py", line 3363, in compute_loss
    outputs = model(**inputs)
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/peft/peft_model.py", line 762, in forward
    return self.get_base_model()(*args, **kwargs)
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 1189, in forward
    outputs = self.model(
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 971, in forward
    causal_mask = self._update_causal_mask(
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 1086, in _update_causal_mask
    causal_mask = _prepare_4d_causal_attention_mask_with_cache_position(
  File "/home/hpc/anaconda3/envs/llm/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 104, in _prepare_4d_causal_attention_mask_with_cache_position
    causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
RuntimeError: expand(torch.cuda.FloatTensor{[8, 8, 1, 1, 512]}, size=[8, 1, 1, 512]): the number of sizes provided (4) must be greater or equal to the number of dimensions in the tensor (5)
  0%|          | 0/42 [00:00<?, ?it/s]

I don’t know if it’s a problem with the transformer package or my data. I hope it can be solved.My source code is as follows:

import peft
    from datasets import load_dataset
    import json
    import torch
    from peft import LoraConfig
    from torch.utils.data import Dataset
    from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
    from transformers import Trainer
    from transformers import DataCollatorWithPadding
    
    
    class MyDataset(Dataset):
        def __init__(self,data_file,tokenizer,max_length=512):
            self.data = []
            self.tokenizer = tokenizer
            self.maxlen = max_length
    
    
            for i in data_file:
                with open(i, 'r') as f:
                    json_data = json.load(f)
                    for item in json_data:
                        instruction = item["instruction"]
                        input = item["input"]
                        output = item["output"]
    
                        combined_text = f"Instruction: {instruction}\nInput: {input}\nOutput: {output}"
                        tokenizer_data = tokenizer(
                            combined_text,
                            truncation = True,
                            padding = "max_length",
                            max_length = self.maxlen,
                            return_tensors = 'pt'
                        )
    
                        self.data.append(tokenizer_data)
    
        def __len__(self):
            return len(self.data)
    
        def __getitem__(self, item):
            return self.data[item]
    
    
    if __name__ == '__main__':
    
        lora_config = LoraConfig(
            r=8,
            lora_alpha=16,
            target_modules=["q_proj", "v_proj"],
            lora_dropout=0.1,
            bias="none",
        )
    
        train_data_file = ["../data/my_sw_ft_dataset.json","../data/my_code_ft_dataset.json"]
        test_data_file = ["../data/my_all_eval_dataset.json"]
        checkpoint = "/home/hpc/xyp/tinyllama"
    
        tokenizer = AutoTokenizer.from_pretrained(checkpoint)
        model = AutoModelForCausalLM.from_pretrained(checkpoint)
    
        model = peft.get_peft_model(model,lora_config)
    
        training_args = TrainingArguments(
            output_dir="./results",  # 输出结果路径
          #  gradient_accumulation_steps=16,  # 梯度累积步数
            #learning_rate=1e-4,  # 学习率
            #num_train_epochs=3,  # 训练轮数
            #logging_dir="./logs",  # 日志文件路径
            #logging_steps=10,
        )
    
        train_dataset = MyDataset(train_data_file,tokenizer)
        test_dataset = MyDataset(test_data_file,tokenizer)
        print(train_dataset[0]["input_ids"].shape)
        print(test_dataset[0]["attention_mask"].shape)
    
        data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
            tokenizer=tokenizer,
            data_collator=data_collator,
        )
        trainer.train()
    
        model.save_pretrained("./lora_llama")