Hi all ,
I’m currently training a SeqToSeq model using the Trainer
class. When running the training loop everything works fine but when reaching the evaluation loop I get the following error :
Traceback (most recent call last):
File "/home/vdm/SLAM-ASR/src/training.py", line 109, in <module>
main()
File "/home/vdm/SLAM-ASR/src/training.py", line 105, in main
trainer.train()
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer.py", line 1624, in train
return inner_training_loop(
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer.py", line 2029, in _inner_training_loop
self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer.py", line 2412, in _maybe_log_save_evaluate
metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer.py", line 3229, in evaluate
output = eval_loop(
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer.py", line 3452, in evaluation_loop
preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer_pt_utils.py", line 123, in nested_concat
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer_pt_utils.py", line 123, in <genexpr>
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer_pt_utils.py", line 123, in nested_concat
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer_pt_utils.py", line 123, in <genexpr>
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer_pt_utils.py", line 123, in nested_concat
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer_pt_utils.py", line 123, in <genexpr>
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer_pt_utils.py", line 125, in nested_concat
return torch_pad_and_concatenate(tensors, new_tensors, padding_index=padding_index)
File "/home/vdm/.pyenv/versions/3.10.13/envs/SLAM-ASR/lib/python3.10/site-packages/transformers/trainer_pt_utils.py", line 84, in torch_pad_and_concatenate
return torch.cat((tensor1, tensor2), dim=0)
RuntimeError: Sizes of tensors must match except in dimension 0. Expected size 229 but got size 211 for tensor number 1 in the list.
In order to debug I have printed the shape of my logits, my labels and my shifted logits (used to compute the loss). Here are the logs:
outputs.logits.shape=torch.Size([8, 229, 51200]) shift_logits.shape=torch.Size([488, 51200]) shift_labels.shape=torch.Size([488])
outputs.logits.shape=torch.Size([8, 211, 51200]) shift_logits.shape=torch.Size([416, 51200]) shift_labels.shape=torch.Size([416])
It’s like the evaluation loop expected the output sequence to be the same length across each samples, but it that was the case why would the training loop works but not the eval one?
I’m really confused about it, so if any one has an idea I would love to hear it
Source Code
data processing
def add_raw_speech_feature_to_dataset(batch, processor):
value = processor(
batch["audio"]["array"],
sampling_rate=batch["audio"]["sampling_rate"]
).input_values[0]
batch["input_values"] = value
batch["input_length"] = len(batch["input_values"])
batch["labels"] = processor(
text=batch["text"].capitalize() + ".",
).input_ids
return batch
Dataloader
@dataclass
class DataCollator:
processor: Wav2Vec2Processor
padding: Union[bool, str] = True
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lenghts and need different padding methods
input_features = [{"input_values": feature["input_values"]} for feature in features]
label_features = [{"input_ids": feature["labels"]} for feature in features]
batch = self.processor.pad(
input_features,
padding=self.padding,
return_tensors="pt",
)
labels_batch = self.processor.pad(
labels=label_features,
padding=self.padding,
return_tensors="pt",
)
labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
batch["labels"] = labels
return batch