Hi guys,
I wanted to train a net based on HuggingFace. Unfortunately the validation process runs out of memory at the end. The values are still estimated by the model, but when the labels are to be converted to texts and the WER is to be determined, the program crashes (out of memory):
***** Running Evaluation *****
Num examples = 15588
Batch size = 4
100%|βββββββββββββββββββββββββββββββββββββββ| 3897/3897 [21:27<00:00, 3.07it/s]Traceback (most recent call last):
File "/tmp/pycharm_project_263/audioengine/model/finetuning/wav2vec2/finetune_parquet.py", line 151, in <module>
File "/tmp/pycharm_project_263/audioengine/model/finetuning/wav2vec2/finetune_parquet.py", line 128, in main
max_val_samples = data_args.max_val_samples if data_args.max_val_samples is not None else len(eval_dataset)
File "/usr/local/lib/python3.8/dist-packages/transformers/trainer.py", line 1757, in evaluate
output = self.prediction_loop(
File "/usr/local/lib/python3.8/dist-packages/transformers/trainer.py", line 1930, in prediction_loop
metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
File "/tmp/pycharm_project_263/audioengine/model/finetuning/wav2vec2/wav2vec2_trainer.py", line 191, in __call__
wer = wer_metric.compute(predictions=pred_str, references=label_str)
File "/usr/local/lib/python3.8/dist-packages/datasets/metric.py", line 403, in compute
output = self._compute(predictions=predictions, references=references, **kwargs)
File "/home/warmachine/.cache/huggingface/modules/datasets_modules/metrics/wer/73b2d32b723b7fb8f204d785c00980ae4d937f12a65466f8fdf78706e2951281/wer.py", line 94, in _compute
return wer(references, predictions)
File "/usr/local/lib/python3.8/dist-packages/jiwer/measures.py", line 80, in wer
measures = compute_measures(
File "/usr/local/lib/python3.8/dist-packages/jiwer/measures.py", line 192, in compute_measures
H, S, D, I = _get_operation_counts(truth, hypothesis)
File "/usr/local/lib/python3.8/dist-packages/jiwer/measures.py", line 273, in _get_operation_counts
editops = Levenshtein.editops(source_string, destination_string)
MemoryError
100%|βββββββββββββββββββββββββββββββββββββββ| 3897/3897 [21:40<00:00, 3.00it/s]
Process finished with exit code 1
wer_metric = load_metric("wer")
def compute_metrics(processor):
def __call__(pred):
pred_logits = pred.predictions
pred_ids = np.argmax(pred_logits, axis=-1)
pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id
pred_str = processor.batch_decode(pred_ids)
# we do not want to group tokens when computing the metrics
label_str = processor.batch_decode(pred.label_ids, group_tokens=False)
wer = wer_metric.compute(predictions=pred_str, references=label_str)
return {"wer": wer}
return __call__
...
trainer = Trainer(
model=model,
data_collator=data_collator,
args=args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=processor.feature_extractor,
train_seq_lengths=train_dataset.input_seq_lengths,
compute_metrics=compute_metrics(processor),
)