My trained data strcutre is:
Dataset({
features: [âinput_valuesâ, âinput_lengthâ, âlabelsâ],
num_rows: 100
})
where input_values representation vector generate from audio and labels are my transcription vector representation.
this my padding class where I getting error in accessing this line ( input_values = feature.get(âinput_valuesâ) # Safely get the value of âinput_valuesâ ) . This error basically occurs when I trained the model
processor: Wav2Vec2Processor
padding: Union[bool, str] = True
max_length: Optional[int] = None
max_length_labels: Optional[int] = None
pad_to_multiple_of: Optional[int] = None
pad_to_multiple_of_labels: Optional[int] = None
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
# split inputs and labels since they have to be of different lengths and need
input_features = []
for feature in features:
input_values = feature.get("input_values") # Safely get the value of "input_values"
if input_values is not None:
input_features.append({"input_values": input_values})
# inpy = [{"input_values": give(feature["labels"])} for feature in features]
label_features = [{"input_ids": feature["labels"]} for feature in features]
batch = self.processor.pad(
input_features,
padding=self.padding,
max_length=self.max_length,
pad_to_multiple_of=self.pad_to_multiple_of,
return_tensors="pt",
)
with self.processor.as_target_processor():
labels_batch = self.processor.pad(
label_features,
padding=self.padding,
max_length=self.max_length_labels,
pad_to_multiple_of=self.pad_to_multiple_of_labels,
return_tensors="pt",
)
# replace padding with -100 to ignore loss correctly
labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
batch["labels"] = labels
# print("Batch input:", batch["input_values"])
# print("Batch labels:", batch["labels"])
return batch
this is my trainer class
from transformers import Trainer trainer = Trainer( model=model, data_collator=data_collator, args=training_args, compute_metrics=compute_metrics, train_dataset=common_voice_train, eval_dataset=common_voice_train, tokenizer=processor.feature_extractor, )