### System Info
I'm trying to train T5 model using HugggingFace trainer, but …I keep getting this error during the evaluation: `TypeError: argument 'ids': 'list' object cannot be interpreted as an integer`
This is the code for the `training arguments`:
```
training_args = Seq2SeqTrainingArguments(accelerator_config={'split_batches': False, 'even_batches': None, 'use_seedable_sampler': True},#org
output_dir="gen/"+args.mfn.replace("/", '_'),
overwrite_output_dir= True,
learning_rate=1e-4,
num_train_epochs=epochs,
load_best_model_at_end = True,
evaluation_strategy="epoch",
logging_strategy= "epoch",
save_strategy= "epoch",
#report_to="wandb",
do_train= True,
do_eval= True,
do_predict=False,#org
per_device_train_batch_size=int(args.batch_size),
per_device_eval_batch_size=2,
dataloader_pin_memory=False,#trying
#save_total_limit=10,
save_safetensors=True,
#gradient_accumulation_steps=8,
#fp16=True,
gradient_checkpointing=True,# Enable mixed precision
#eval_accumulation_steps=8,
#predict_with_generate=True,
generation_max_length=max_target_length,
generation_config= gen_conf)
```
And that's the code for the trainer:
```
trainer=Seq2SeqTrainer(
model=model,
args=training_args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["validation"],
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics
)
```
And that's the code for the `compute_metrics` function
```
def compute_metrics(eval_preds):
#change the tokenizer model based on the model that you're using for prediction
tokenizer = AutoTokenizer.from_pretrained("UBC-NLP/AraT5v2-base-1024", use_fast=True,revision='main')
preds, labels = eval_preds
if isinstance(preds, tuple):
preds = preds[0]
decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
if data_args.ignore_pad_token_for_loss:
# Replace -100 in the labels as we can't decode them.
labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
# Some simple post-processing
decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
scores1=[]
scores2=[]
scores_rouge_1=[]
scores_rouge_2=[]
scores_rouge_L=[]
model1 = SentenceTransformer("distiluse-base-multilingual-cased-v2")
model2= SentenceTransformer("embaas/sentence-transformers-e5-large-v2")
for ref, pred in zip(decoded_labels, decoded_preds):
#score = scorer.score(ref, pred)
score1 = cosine_sim(model1,ref, pred)
score2 = cosine_sim(model2,ref, pred)
rouge_score=rouge.compute(predictions=[df["preds"][i]], references=[df["ref"][i]], tokenizer=tokenizer.tokenize)#summy_tokenizer.to_words)
scores_rouge_1=rouge_score["rouge1"]
scores_rouge_2=rouge_score["rouge2"]
scores_rouge_L=rouge_score["rougeL"]
scores1.append(score1)
scores2.append(score2)
result={}
result["dist"]= sum(scores1) / len(scores1)
result["e5"]= sum(scores2) / len(scores2)
result["rouge_1"]=sum(scores_rouge_1) / len(scores_rouge_1)
result["rouge_2"]=sum(scores_rouge_2) / len(scores_rouge_2)
result["rouge_L"]=sum(scores_rouge_L) / len(scores_rouge_L)
print("***********","e5:",result["e5"],"dist:",result["dist"],"r1:",result["rouge_1"],"r2:",result["rouge_2"],"r_L",result["rouge_L"],"*************")
prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
result["gen_len"] = np.mean(prediction_lens)
result = {k: round(v, 4) for k, v in result.items()}
return result
```
I keep getting this error:
```
File "/home/ubuntu/Aml/fineTune.py", line 157, in main
trainer.train()
File "/home/ubuntu/Aml/venv/lib/python3.10/site-packages/transformers/trainer.py", line 1859, in train
return inner_training_loop(
File "/home/ubuntu/Aml/venv/lib/python3.10/site-packages/transformers/trainer.py", line 2298, in _inner_training_loop
self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
File "/home/ubuntu/Aml/venv/lib/python3.10/site-packages/transformers/trainer.py", line 2662, in _maybe_log_save_evaluate
metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
File "/home/ubuntu/Aml/venv/lib/python3.10/site-packages/transformers/trainer_seq2seq.py", line 180, in evaluate
return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix)
File "/home/ubuntu/Aml/venv/lib/python3.10/site-packages/transformers/trainer.py", line 3467, in evaluate
output = eval_loop(
File "/home/ubuntu/Aml/venv/lib/python3.10/site-packages/transformers/trainer.py", line 3719, in evaluation_loop
metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
File "/home/ubuntu/Aml/utils.py", line 82, in compute_metrics
decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
File "/home/ubuntu/Aml/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 3771, in batch_decode
return [
File "/home/ubuntu/Aml/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 3772, in <listcomp>
self.decode(
File "/home/ubuntu/Aml/venv/lib/python3.10/site-packages/transformers/tokenization_utils_base.py", line 3811, in decode
return self._decode(
File "/home/ubuntu/Aml/venv/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py", line 625, in _decode
text = self._tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)
TypeError: argument 'ids': 'list' object cannot be interpreted as an integer
```
Can anybody help me with that?
### Who can help?
@ArthurZucker and @younesbelkada
### Information
- [ ] The official example scripts
- [X] My own modified scripts
### Tasks
- [ ] An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)
- [ ] My own task or dataset (give details below)
### Reproduction
To repreduce this issue, just let me know and I will provide the whole script.
### Expected behavior
To repreduce this issue, just let me know and I will provide the whole script.