Error in fine tuning T5 model for Seq2Seq translation task

compute_metrics function throwing error at np.argmax

Hi,

I am trying to translate text from 1 semantic to another semantic language. I am using transformers version 4.19.2 and datasets version 2.2.1

The dataset(s) are .csv files with two columns: “ src_db_sql ” and “ tgt_db_sql ”. The following is the code that led to the error - if anyone can help identify my error, please let me know :grinning:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import datasets
from datasets import Dataset
from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import DataCollatorForSeq2Seq
from datasets import load_metric

data_path = "./../../datasets/dir_parse_data.xlsx"
conversion_df = pd.read_excel(data_path, sheet_name='Sheet1', dtype=str)
conversion_df.astype(str)
conversion_df.rename(columns={'Semantic1':'semantic1','Semantic2 Syntax':'semantic2'}, inplace=True)
conversion_df[['semantic1', 'semantic2']] = conversion_df[['semantic1', 'semantic2']].astype(str)

train_df, test_df = train_test_split(conversion_df, test_size=0.2)

train_df.head()

train_dataset = Dataset.from_dict(train_df)
test_dataset = Dataset.from_dict(test_df)
conversion_dataset_dict = datasets.DatasetDict({"train":train_dataset,"test":test_dataset})

tokenizer = AutoTokenizer.from_pretrained("t5-base")
source_sql_key = "semantic1"
target_sql_key = "semantic2"
prefix = "translate semantic1 to semantic2: "


def preprocess_function(conv_dataset):
    inputs = [prefix + conv_data for conv_data in conv_dataset[source_sql_key]]
    targets = [conv_data for conv_data in conv_dataset[target_sql_key]]
    model_inputs = tokenizer(inputs, max_length=500, truncation=True, padding=True)

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=500, truncation=True, padding=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_conv_data = conversion_dataset_dict.map(preprocess_function, batched=True)

model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", return_dict=True)
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    print("logits - ", type(logits))
    print("labels - ", type(labels))
    predictions = np.argmax(logits, axis=-1)
    print("predictions - ", predictions)
    return metric.compute(predictions=predictions, references=labels)
	


training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=3,
    fp16=False,
    no_cuda=True,
    report_to=None
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_conv_data["train"],
    eval_dataset=tokenized_conv_data["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Pls someone help for below issue. I get below error only after completing one epoch and at the time trainer.evaluate()

“Could not broadcast input array from shape (240, 500, 32128) into shape (240, 500)”.

Following is stack:

===========================================Stack Start===================================================================
logits - <class ‘tuple’>
labels - <class ‘numpy.ndarray’>

AttributeError Traceback (most recent call last)
C:\Users\D00365~1\AppData\Local\Temp/ipykernel_14552/4032920361.py in
----> 1 trainer.train()

D:\Anaconda3\lib\site-packages\transformers\trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1315 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1316 )
→ 1317 return inner_training_loop(
1318 args=args,
1319 resume_from_checkpoint=resume_from_checkpoint,

D:\Anaconda3\lib\site-packages\transformers\trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1642
1643 self.control = self.callback_handler.on_epoch_end(args, self.state, self.control)
→ 1644 self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
1645
1646 if DebugOption.TPU_METRICS_DEBUG in self.args.debug:

D:\Anaconda3\lib\site-packages\transformers\trainer.py in _maybe_log_save_evaluate(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)
1794 metrics = None
1795 if self.control.should_evaluate:
→ 1796 metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
1797 self._report_to_hp_search(trial, epoch, metrics)
1798

D:\Anaconda3\lib\site-packages\transformers\trainer_seq2seq.py in evaluate(self, eval_dataset, ignore_keys, metric_key_prefix, max_length, num_beams)
68 self._max_length = max_length if max_length is not None else self.args.generation_max_length
69 self._num_beams = num_beams if num_beams is not None else self.args.generation_num_beams
—> 70 return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix)
71
72 def predict(

D:\Anaconda3\lib\site-packages\transformers\trainer.py in evaluate(self, eval_dataset, ignore_keys, metric_key_prefix)
2456
2457 eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
→ 2458 output = eval_loop(
2459 eval_dataloader,
2460 description=“Evaluation”,

D:\Anaconda3\lib\site-packages\transformers\trainer.py in evaluation_loop(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)
2738 )
2739 else:
→ 2740 metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
2741 else:
2742 metrics = {}

C:\Users\D00365~1\AppData\Local\Temp/ipykernel_14552/588117875.py in compute_metrics(eval_pred)
3 print("logits - ", type(logits))
4 print("labels - ", type(labels))
----> 5 predictions = np.argmax(logits, axis=-1)
6 print("predictions - ", predictions)
7 return metric.compute(predictions=predictions, references=labels)

ValueError: could not broadcast input array from shape (240, 500, 32128) into shape (240, 500)

==========================================Stack End================================================================================

============================================================Code========================================

Here is my “compute_metrics” function and trainer:

=============================================================================================================

logits is coming as <class ‘tuple’> when I printed type of logits and labels coming as <class ‘numpy.ndarray’>

logits - <class ‘tuple’>
labels - <class ‘numpy.ndarray’>

Hi,

Recently I’ve struggled with similar issue for the exact same implementation of compute_metrics. I believe it’s related to this issue which in my case was caused by logits containing two numpy.ndarrays of different shapes.

Thank you @ZeroMaster28 . I have got it corrected.

Hi @aidev,

What’s the fix for this issue?