I’m working with a multinode environment where I’ve got 3 nodes and each node has multiple GPUs
When trying to compute metrics and return the bleu score for my task I get this error
[rank10]: File "/ext3/miniconda3/envs/venv/lib/python3.8/site-packages/transformers/trainer.py", line 3641, in evaluate
[rank10]: output = eval_loop(
[rank10]: File "/ext3/miniconda3/envs/venv/lib/python3.8/site-packages/transformers/trainer.py", line 3933, in evaluation_loop
[rank10]: metrics[f"{metric_key_prefix}_loss"] = all_losses.mean().item()
[rank10]: TypeError: 'NoneType' object does not support item assignment
This is my compute_metrics function
def compute_metrics(eval_pred, tokenizer):
rank = int(os.getenv('RANK', '0'))
world_size = 12
# Load the metric with the specified num_processes and process_id
metric = evaluate.load(
"sacrebleu",
num_process=12,
process_id=rank,
)
predictions, labels = eval_pred
decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
# Replace -100 in the labels as we can't decode them
labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
# Print sample translations and references for debugging
print("Sample translations: \n")
print(decoded_preds[:10])
print("References: ", decoded_labels[:10])
references = [[label] for label in decoded_labels]
# Compute the BLEU score
return metric.compute(predictions=decoded_preds, references=references)
Can someone help point out what to fix here?