RuntimeError Traceback (most recent call last)
Cell In[29], line 2
1 # Train model
----> 2 trainer.train()
4 # # Start training from the last checkpoint
5 # trainer.train(resume_from_checkpoint=checkpoint)
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/transformers/trainer.py:2245, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
2243 hf_hub_utils.enable_progress_bars()
2244 else:
→ 2245 return inner_training_loop(
2246 args=args,
2247 resume_from_checkpoint=resume_from_checkpoint,
2248 trial=trial,
2249 ignore_keys_for_eval=ignore_keys_for_eval,
2250 )
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/transformers/trainer.py:2556, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
2549 context = (
2550 functools.partial(self.accelerator.no_sync, model=model)
2551 if i != len(batch_samples) - 1
2552 and self.accelerator.distributed_type != DistributedType.DEEPSPEED
2553 else contextlib.nullcontext
2554 )
2555 with context():
→ 2556 tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
2558 if (
2559 args.logging_nan_inf_filter
2560 and not is_torch_xla_available()
2561 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
2562 ):
2563 # if loss is nan or inf simply add the average of previous logged losses
2564 tr_loss = tr_loss + tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/transformers/trainer.py:3718, in Trainer.training_step(self, model, inputs, num_items_in_batch)
3715 return loss_mb.reduce_mean().detach().to(self.args.device)
3717 with self.compute_loss_context_manager():
→ 3718 loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
3720 del inputs
3721 if (
3722 self.args.torch_empty_cache_steps is not None
3723 and self.state.global_step % self.args.torch_empty_cache_steps == 0
3724 ):
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/transformers/trainer.py:3783, in Trainer.compute_loss(self, model, inputs, return_outputs, num_items_in_batch)
3781 loss_kwargs[“num_items_in_batch”] = num_items_in_batch
3782 inputs = {**inputs, **loss_kwargs}
→ 3783 outputs = model(**inputs)
3784 # Save past state if it exists
3785 # TODO: this needs to be fixed and made cleaner later.
3786 if self.args.past_index >= 0:
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/torch/nn/modules/module.py:1739, in Module._wrapped_call_impl(self, *args, **kwargs)
1737 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1738 else:
→ 1739 return self._call_impl(*args, **kwargs)
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/torch/nn/modules/module.py:1750, in Module._call_impl(self, *args, **kwargs)
1745 # If we don’t have any hooks, we want to skip the rest of the logic in
1746 # this function, and just call forward.
1747 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1748 or _global_backward_pre_hooks or _global_backward_hooks
1749 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1750 return forward_call(*args, **kwargs)
1752 result = None
1753 called_always_called_hooks = set()
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/peft/peft_model.py:1756, in PeftModelForCausalLM.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)
1754 with self._enable_peft_forward_hooks(**kwargs):
1755 kwargs = {k: v for k, v in kwargs.items() if k not in self.special_peft_forward_args}
→ 1756 return self.base_model(
1757 input_ids=input_ids,
1758 attention_mask=attention_mask,
1759 inputs_embeds=inputs_embeds,
1760 labels=labels,
1761 output_attentions=output_attentions,
1762 output_hidden_states=output_hidden_states,
1763 return_dict=return_dict,
1764 **kwargs,
1765 )
1767 batch_size = _get_batch_size(input_ids, inputs_embeds)
1768 if attention_mask is not None:
1769 # concat prompt attention mask
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/torch/nn/modules/module.py:1739, in Module._wrapped_call_impl(self, *args, **kwargs)
1737 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1738 else:
→ 1739 return self._call_impl(*args, **kwargs)
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/torch/nn/modules/module.py:1750, in Module._call_impl(self, *args, **kwargs)
1745 # If we don’t have any hooks, we want to skip the rest of the logic in
1746 # this function, and just call forward.
1747 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1748 or _global_backward_pre_hooks or _global_backward_hooks
1749 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1750 return forward_call(*args, **kwargs)
1752 result = None
1753 called_always_called_hooks = set()
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:193, in BaseTuner.forward(self, *args, **kwargs)
192 def forward(self, *args: Any, **kwargs: Any):
→ 193 return self.model.forward(*args, **kwargs)
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/accelerate/hooks.py:176, in add_hook_to_module..new_forward(module, *args, **kwargs)
174 output = module._old_forward(*args, **kwargs)
175 else:
→ 176 output = module._old_forward(*args, **kwargs)
177 return module._hf_hook.post_forward(module, output)
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/transformers/utils/deprecation.py:172, in deprecate_kwarg..wrapper..wrapped_func(*args, **kwargs)
168 elif minimum_action in (Action.NOTIFY, Action.NOTIFY_ALWAYS) and not is_torchdynamo_compiling():
169 # DeprecationWarning is ignored by default, so we use FutureWarning instead
170 warnings.warn(message, FutureWarning, stacklevel=2)
→ 172 return func(*args, **kwargs)
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:874, in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, logits_to_keep, **kwargs)
872 loss = None
873 if labels is not None:
→ 874 loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size, **kwargs)
876 if not return_dict:
877 output = (logits,) + outputs[1:]
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/transformers/loss/loss_utils.py:56, in ForCausalLMLoss(logits, labels, vocab_size, num_items_in_batch, ignore_index, shift_labels, **kwargs)
54 # Enable model parallelism
55 shift_labels = shift_labels.to(logits.device)
—> 56 loss = fixed_cross_entropy(logits, shift_labels, num_items_in_batch, ignore_index, **kwargs)
57 return loss
File ~/anaconda3/envs/python3/lib/python3.10/site-packages/transformers/loss/loss_utils.py:29, in fixed_cross_entropy(source, target, num_items_in_batch, ignore_index, **kwargs)
27 loss = nn.functional.cross_entropy(source, target, ignore_index=ignore_index, reduction=reduction)
28 if reduction == “sum”:
—> 29 loss = loss / num_items_in_batch
30 return loss
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:7 and cuda:0!
============================================================
Code which i used:
============================================================
‘’’
from transformers import AutoModelForCausalLM
import torch
Clear GPU memory
torch.cuda.empty_cache()
MODEL_NAME = “meta-llama/Llama-3.1-70B”
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
# torch_dtype=torch.bfloat16, # Using bf16 precision
device_map=“auto”,
cache_dir=“/home/ec2-user/SageMaker/huggingface_cache”
)
Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME,
cache_dir=“/home/ec2-user/SageMaker/huggingface_cache”)
tokenizer.pad_token = tokenizer.eos_token
from trl import SFTTrainer, setup_chat_format
model, tokenizer = setup_chat_format(model, tokenizer)
def prepare_text_column(examples):
return {
“text”: [
tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
for messages in examples[“messages”]
]
}
def tokenize_function(examples):
# Don’t send to device during tokenization - will be handled by trainer
tokenized = tokenizer(examples[“text”], padding=True, truncation=True, max_length=2048, return_tensors=“pt”)
tokenized[“labels”] = tokenized[“input_ids”][:].clone()
return tokenized
Data collator
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_train,
eval_dataset=tokenized_eval,
data_collator=data_collator
)
trainer.train()
‘’’
Please help me. Thanks!