Hello, I am facing the same issues. Here is a minimal code that is failing.
import librosa
import torch
from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2Processor, Wav2Vec2FeatureExtractor, Wav2Vec2ForCTC, Wav2Vec2ProcessorWithLM
from transformers import DataCollatorForTokenClassification
from transformers import Trainer, TrainingArguments
from datasets import load_dataset, load_metric
import pdb
# load tokenizer and model # see the doc here: https://huggingface.co/docs/transformers/model_doc/wav2vec2#wav2vec2
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
# load dataset and more
dataset =load_dataset('hf-internal-testing/librispeech_asr_dummy','clean')
data_collator = DataCollatorForTokenClassification(tokenizer=processor.feature_extractor, padding=True)
wer_metric = load_metric("wer")
# training
training_args = TrainingArguments(output_dir=f"wav2vec2")
trainer = Trainer(
model=model,
data_collator=data_collator,
args=training_args,
train_dataset=dataset['validation'],
tokenizer=processor.feature_extractor
)
trainer.train()
And here is the error message:
Any idea of what is wrong?