So I followed the fast-whisper-finetuning using PEFT and I want to evaluate on my test dataset, but I get the following error:
return default_list ValueError: A custom logits processor of type <class ‘transformers.generation.logits_process.ForceTokensLogitsProcessor’> with values <transformers.generation.logits_process.ForceTokensLogitsProcessor object at 0x0000015389623160> has been passed to .generate()
, but it has already been created with the values <transformers.generation.logits_process.ForceTokensLogitsProcessor object at 0x0000015572CF9300>. <transformers.generation.logits_process.ForceTokensLogitsProcessor object at 0x0000015572CF9300> has been created by passing the corresponding arguments to generate or by the model’s config default values. If you just want to change the default values of logits processor consider passing them as arguments to .generate()
instead of using a custom logits processor.
with code:
eval_dataloader = DataLoader(test_dataset, batch_size=4, collate_fn=data_collator)
forced_decoder_ids = processor.get_decoder_prompt_ids(language=“Dutch”, task=“transcribe”)
normalizer = BasicTextNormalizer()
predictions =
references =
normalized_predictions =
normalized_references =
model.eval()
for step, batch in enumerate(tqdm(eval_dataloader)):
with torch.cuda.amp.autocast():
with torch.no_grad():
generated_tokens = (
model.generate(
input_features=batch[“input_features”].to(“cuda”),
forced_decoder_ids=forced_decoder_ids,
max_new_tokens=255,
)
.cpu()
.numpy()
)
labels = batch[“labels”].cpu().numpy()
labels = np.where(labels != -100, labels, processor.tokenizer.pad_token_id)
decoded_preds = processor.tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
decoded_labels = processor.tokenizer.batch_decode(labels, skip_special_tokens=True)
predictions.extend(decoded_preds)
references.extend(decoded_labels)
normalized_predictions.extend([normalizer(pred).strip() for pred in decoded_preds])
normalized_references.extend([normalizer(label).strip() for label in decoded_labels])
del generated_tokens, labels, batch
gc.collect()
wer = 100 * metric.compute(predictions=predictions, references=references)
normalized_wer = 100 * metric.compute(predictions=normalized_predictions, references=normalized_references)
eval_metrics = {“eval/wer”: wer, “eval/normalized_wer”: normalized_wer}
print(f"{wer=} and {normalized_wer=}")
print(eval_metrics)