I’ve been trying to get this snippet to run for days but keep running into the same error so I’ve come here for help. I know there exists a Wav2Vec2-Bert model but I’m trying to learn how to finetune/train a model for educational purposes. I’ve tried setting “padding = True” everywhere, tried to batch them, tried setting “dispatch_batches=False” and “split_batches=True” but no matter what I do, I always get the tensor error when it tries to compute any metrics. Any help is appreciated! (I’m using transformers-4.39.3, tokenizers-0.15.2, and accelerate-0.29.2)
from transformers import AutoTokenizer, Wav2Vec2Processor, SpeechEncoderDecoderModel
from datasets import load_dataset, load_metric, Dataset
from transformers import Trainer, TrainingArguments
import torch
import numpy as np
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
encoder_id = “facebook/wav2vec2-base-960h”
decoder_id = “google-bert/bert-base-uncased”
processor = Wav2Vec2Processor.from_pretrained(encoder_id)
tokenizer = AutoTokenizer.from_pretrained(decoder_id)
model = SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(encoder_id, decoder_id)
model.config.decoder_start_token_id = tokenizer.cls_token_id
model.config.pad_token_id = tokenizer.pad_token_id
num_samples = 10
dataset = load_dataset(“librispeech_asr”, split=“train.clean.100”, streaming = True, trust_remote_code=True)
das = list(dataset.take(num_samples))
ds = Dataset.from_dict({‘audio’: [da[‘audio’] for da in das], ‘text’: [da[‘text’] for da in das]})
#ds = load_dataset(“hf-internal-testing/librispeech_asr_demo”, “clean”, split=“validation”)
wer_metric = load_metric(“wer”)
def compute_metrics(pred):
pred_logits = pred.predictions
pred_ids = torch.argmax(pred_logits, dim=-1)
pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
pred_str = processor.batch_decode(pred_ids, group_tokens = False)
label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
wer = wer_metric.compute(predictions=pred_str, references=label_str)
return {"wer": wer}
@dataclass
class DataCollatorCTCWithPadding:
processor: Wav2Vec2Processor
padding: Union[bool, str] = True
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
input_features = [{"input_values": feature["input_values"]} for feature in features]
label_features = [{"input_ids": feature["labels"]} for feature in features]
batch = self.processor.pad(
input_features,
padding=self.padding,
return_tensors="pt",
)
with self.processor.as_target_processor():
labels_batch = self.processor.pad(
label_features,
padding=self.padding,
return_tensors="pt",
)
labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
batch["labels"] = labels
return batch
data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)
def prepare_dataset(batch):
audio = batch[“audio”]
batch[“input_values”] = processor(audio[“array”], sampling_rate=audio[“sampling_rate”]).input_values[0]
batch[“input_length”] = len(batch[“input_values”])
with processor.as_target_processor():
batch[“labels”] = processor(batch[“text”]).input_ids
return batch
train_dataset = ds.map(prepare_dataset, remove_columns=ds.column_names)
for param in model.encoder.parameters():
param.requires_grad = False
training_args = TrainingArguments(
output_dir=“YousifTest”,
per_device_train_batch_size=4,
split_batches=True,
gradient_accumulation_steps=2,
evaluation_strategy=“epoch”,
num_train_epochs=1,
gradient_checkpointing=True,
fp16=True,
save_steps=400,
eval_steps=1,
logging_steps=1,
learning_rate=3e-4,
save_total_limit=2,
max_steps=24,
)
trainer = Trainer(
model=model,
data_collator=data_collator,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=train_dataset,
eval_dataset=train_dataset,
)
trainer.train()
trainer.save_model(“Test2”)
Traceback (most recent call last): | 0/2 [00:00<?, ?it/s]
File “c:\Users\Yousif\Documents\University\Assignments\Thesis\Work Files\Transformers\Trainer.py”, line 116, in
trainer.train()
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer.py”, line 1780, in train
return inner_training_loop(
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer.py”, line 2213, in _inner_training_loop
self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer.py”, line 2577, in _maybe_log_save_evaluate
metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer.py”, line 3365, in evaluate
output = eval_loop(
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer.py”, line 3580, in evaluation_loop
preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer_pt_utils.py”, line 138, in nested_concat
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer_pt_utils.py”, line 138, in
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer_pt_utils.py”, line 138, in nested_concat
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer_pt_utils.py”, line 138, in
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer_pt_utils.py”, line 138, in nested_concat
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer_pt_utils.py”, line 138, in
return type(tensors)(nested_concat(t, n, padding_index=padding_index) for t, n in zip(tensors, new_tensors))
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer_pt_utils.py”, line 140, in nested_concat
return torch_pad_and_concatenate(tensors, new_tensors, padding_index=padding_index)
File “C:\Users\Yousif\anaconda3\lib\site-packages\transformers\trainer_pt_utils.py”, line 99, in torch_pad_and_concatenate
return torch.cat((tensor1, tensor2), dim=0)
RuntimeError: Sizes of tensors must match except in dimension 0. Expected size 227 but got size 214 for tensor number 1 in the list.