Hi all,
I am trying to train a custom model for NLP sequence classification (multiclass) and struggling to be able to train it for a reason I don’t know, that is the reason why I am asking on this forum. I already had a look at similar posts on the forum with no luck.
First of all, my dataset looks like the following in DataFrame before introducing it to a dataset (5 instances per class or label, being 0 the lowest label number and 251 the maximum one, so 252 labels in total):
text label
0 Configuración del área de selección de TV Set 0
1 Configuración del área de selección de TV Set 0
2 Conformación de la sección de selección de TV... 0
3 Conformación ae la stcción de seldcción de TV Set 0
4 Validar la configuración del área de selección... 0
... ... ...
1281 Validación incorrecta por identificador de art... 251
1282 Validación incorrecta mediante identificador d... 251
1283 Validación incorrecta por identificador de art... 251
1284 Validación incorrecta por identificador de art... 251
1285 Validar Validación incorrecta por identificado... 251
As It is a custom model, I changed the value of out_features at out_proj in the classification part, so the resulting architecture looks like the following:
RobertaForSequenceClassification(
(roberta): RobertaModel(
(embeddings): RobertaEmbeddings(
(word_embeddings): Embedding(50262, 1024, padding_idx=1)
(position_embeddings): Embedding(514, 1024, padding_idx=1)
(token_type_embeddings): Embedding(1, 1024)
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(encoder): RobertaEncoder(
(layer): ModuleList(
(0-23): 24 x RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSdpaSelfAttention(
(query): Linear(in_features=1024, out_features=1024, bias=True)
(key): Linear(in_features=1024, out_features=1024, bias=True)
(value): Linear(in_features=1024, out_features=1024, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=1024, out_features=1024, bias=True)
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.0, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=1024, out_features=4096, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): RobertaOutput(
(dense): Linear(in_features=4096, out_features=1024, bias=True)
(LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.0, inplace=False)
)
)
)
)
)
(classifier): RobertaClassificationHead(
(dense): Linear(in_features=1024, out_features=1024, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
(out_proj): Linear(in_features=1024, out_features=252, bias=True)
)
)
Then I use the following code in order to create a HuggingFace Dataset:
dataset = Dataset.from_pandas(df, split='train')
dataset = dataset.train_test_split(shuffle=True, seed=42, test_size=0.2)
print(dataset)
Where the print gives the following result (I already checked that values in label go from 0 to N-1 labels or classes):
DatasetDict({
train: Dataset({
features: ['text', 'label'],
num_rows: 1028
})
test: Dataset({
features: ['text', 'label'],
num_rows: 258
})
})
Despite having done all the remaining steps before training correctly (or so I believe) and having at least one instance per class in train and test dataset, when I get to the function train, I get the following error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[103], line 1
----> 1 trainer.train()
2 modelo_peft.to('cpu')
3 modelo_peft.eval()
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\trainer.py:2238, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
2236 hf_hub_utils.enable_progress_bars()
2237 else:
-> 2238 return inner_training_loop(
2239 args=args,
2240 resume_from_checkpoint=resume_from_checkpoint,
2241 trial=trial,
2242 ignore_keys_for_eval=ignore_keys_for_eval,
2243 )
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\trainer.py:2582, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
2575 context = (
2576 functools.partial(self.accelerator.no_sync, model=model)
2577 if i != len(batch_samples) - 1
2578 and self.accelerator.distributed_type != DistributedType.DEEPSPEED
2579 else contextlib.nullcontext
2580 )
2581 with context():
-> 2582 tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
2584 if (
2585 args.logging_nan_inf_filter
2586 and not is_torch_xla_available()
2587 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
2588 ):
2589 # if loss is nan or inf simply add the average of previous logged losses
2590 tr_loss = tr_loss + tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\trainer.py:3796, in Trainer.training_step(self, model, inputs, num_items_in_batch)
3793 return loss_mb.reduce_mean().detach().to(self.args.device)
3795 with self.compute_loss_context_manager():
-> 3796 loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
3798 del inputs
3799 if (
3800 self.args.torch_empty_cache_steps is not None
3801 and self.state.global_step % self.args.torch_empty_cache_steps == 0
3802 ):
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\trainer.py:3884, in Trainer.compute_loss(self, model, inputs, return_outputs, num_items_in_batch)
3882 kwargs["num_items_in_batch"] = num_items_in_batch
3883 inputs = {**inputs, **kwargs}
-> 3884 outputs = model(**inputs)
3885 # Save past state if it exists
3886 # TODO: this needs to be fixed and made cleaner later.
3887 if self.args.past_index >= 0:
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1773, in Module._wrapped_call_impl(self, *args, **kwargs)
1771 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1772 else:
-> 1773 return self._call_impl(*args, **kwargs)
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1784, in Module._call_impl(self, *args, **kwargs)
1779 # If we don't have any hooks, we want to skip the rest of the logic in
1780 # this function, and just call forward.
1781 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1782 or _global_backward_pre_hooks or _global_backward_hooks
1783 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1784 return forward_call(*args, **kwargs)
1786 result = None
1787 called_always_called_hooks = set()
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\peft\peft_model.py:1652, in PeftModelForSequenceClassification.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)
1650 if peft_config.peft_type == PeftType.POLY:
1651 kwargs["task_ids"] = task_ids
-> 1652 return self.base_model(
1653 input_ids=input_ids,
1654 attention_mask=attention_mask,
1655 inputs_embeds=inputs_embeds,
1656 labels=labels,
1657 output_attentions=output_attentions,
1658 output_hidden_states=output_hidden_states,
1659 return_dict=return_dict,
1660 **kwargs,
1661 )
1663 batch_size = _get_batch_size(input_ids, inputs_embeds)
1664 if attention_mask is not None:
1665 # concat prompt attention mask
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1773, in Module._wrapped_call_impl(self, *args, **kwargs)
1771 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1772 else:
-> 1773 return self._call_impl(*args, **kwargs)
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1784, in Module._call_impl(self, *args, **kwargs)
1779 # If we don't have any hooks, we want to skip the rest of the logic in
1780 # this function, and just call forward.
1781 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1782 or _global_backward_pre_hooks or _global_backward_hooks
1783 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1784 return forward_call(*args, **kwargs)
1786 result = None
1787 called_always_called_hooks = set()
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\peft\tuners\tuners_utils.py:222, in BaseTuner.forward(self, *args, **kwargs)
221 def forward(self, *args: Any, **kwargs: Any):
--> 222 return self.model.forward(*args, **kwargs)
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\models\roberta\modeling_roberta.py:1228, in RobertaForSequenceClassification.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1226 elif self.config.problem_type == "single_label_classification":
1227 loss_fct = CrossEntropyLoss()
-> 1228 loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
1229 elif self.config.problem_type == "multi_label_classification":
1230 loss_fct = BCEWithLogitsLoss()
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1773, in Module._wrapped_call_impl(self, *args, **kwargs)
1771 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1772 else:
-> 1773 return self._call_impl(*args, **kwargs)
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\module.py:1784, in Module._call_impl(self, *args, **kwargs)
1779 # If we don't have any hooks, we want to skip the rest of the logic in
1780 # this function, and just call forward.
1781 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1782 or _global_backward_pre_hooks or _global_backward_hooks
1783 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1784 return forward_call(*args, **kwargs)
1786 result = None
1787 called_always_called_hooks = set()
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\modules\loss.py:1310, in CrossEntropyLoss.forward(self, input, target)
1309 def forward(self, input: Tensor, target: Tensor) -> Tensor:
-> 1310 return F.cross_entropy(
1311 input,
1312 target,
1313 weight=self.weight,
1314 ignore_index=self.ignore_index,
1315 reduction=self.reduction,
1316 label_smoothing=self.label_smoothing,
1317 )
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\torch\nn\functional.py:3462, in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
3460 if size_average is not None or reduce is not None:
3461 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3462 return torch._C._nn.cross_entropy_loss(
3463 input,
3464 target,
3465 weight,
3466 _Reduction.get_enum(reduction),
3467 ignore_index,
3468 label_smoothing,
3469 )
IndexError: Target 134 is out of bounds.
Any ideas of what may be wrong? Let me know if any other information is needed.
Thanks,
Javier