Error while training Mixtral in 8bit

Hi when I try to train my model using 8bit I get an error as shown below my code is as follows

from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
MODEL_NAME =“mistralai/Mixtral-8x7B-v0.1”
peft_config = LoraConfig(
lora_alpha=16,
lora_dropout=0.1,
r=64,
bias=“none”,
target_modules=[
“q_proj”,
“k_proj”,
“v_proj”,
“o_proj”,
],
task_type=“CAUSAL_LM”
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
load_in_8bit=True,
device_map=“auto”,
use_flash_attention_2=True,
torch_dtype=torch.bfloat16,
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

LEN = 2500
args = TrainingArguments(
output_dir = MODEL_NAME+“_”+str(LEN)+“_ner”,
num_train_epochs = 3,
per_device_train_batch_size = 1,
warmup_steps = 0.04,
logging_steps=100,
# save_strategy=“epoch”,
save_steps=1000,
learning_rate=2e-5,
optim=“adamw_torch”
lr_scheduler_type=‘cosine’,
gradient_accumulation_steps= 8,
bf16=True,
# evaluation_strategy=“epoch”,
eval_steps=1000,
# gradient_checkpointing=True,
save_total_limit=2,

)
trainer = SFTTrainer(
model=model,
max_seq_length=LEN,
tokenizer=tokenizer,
args=args,
peft_config=peft_config,
train_dataset=data_train,
eval_dataset=data_val,
dataset_text_field=“text”,
packing= False,
neftune_noise_alpha=5,

)
trainer.train()

The error I get is as follows.

--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) Cell In[22], line 1 ----> 1 trainer.train() File [~/anaconda3/envs/moe/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:315](… /trl/trainer/sft_trainer.py:315), in SFTTrainer.train(self, *args, **kwargs) 312 if self.neftune_noise_alpha is not None and not self._trainer_supports_neftune: 313 self.model = self._trl_activate_neftune(self.model) → 315 output = super().train(*args, **kwargs) 317 # After training we make sure to retrieve back the original forward pass method 318 # for the embedding layer by removing the forward post hook. 319 if self.neftune_noise_alpha is not None and not self._trainer_supports_neftune: File ~/anaconda3/envs/moe/lib/python3.11/site-packages/transformers/trainer.py:1537, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs) 1535 hf_hub_utils.enable_progress_bars() 1536 else: → 1537 return inner_training_loop( 1538 args=args, 1539 resume_from_checkpoint=resume_from_checkpoint, 1540 trial=trial, 1541 ignore_keys_for_eval=ignore_keys_for_eval, 1542 ) File …/transformers/trainer.py:1854, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval) 1851 self.control = self.callback_handler.on_step_begin(args, self.state, self.control)

486 ) 487 grad_A = torch.matmul(grad_output, CB).view(ctx.grad_shape).to(ctx.dtype_A) 488 else: RuntimeError: The size of tensor a (32) must match the size of tensor b (8) at non-singleton dimension 0

Thanks in advance