Hi there,
I need help with this issue I am facing while using T5-base via AutoModelforSeq2SeqLM. Kindly help with it. I am providing my code along with the error for reference.
Code:-
import random
import numpy as np
import torch
import pandas as pd
import json
import transformers
from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM
from trl import RewardTrainer, SFTTrainer
from datasets import Dataset
from transformers import (
    Trainer,
    TrainingArguments,
    default_data_collator,
    DataCollatorForLanguageModeling
)
device="cuda"
df = pd.read_parquet("/raid/ganesh/vishak/pranav/ss2113/train_rlhf.parquet")
model = AutoModelForSeq2SeqLM.from_pretrained("summarization_policy_new/").to(device)
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base", truncation=True, max_length=256, padding="max_length")
text = df.iloc[2]["prompt"]
tokenized_text = tokenizer(text, return_tensors="pt", max_length=256).to(device)
tokenizer.decode(model.generate(**tokenized_text)[0])
df = pd.read_parquet("/raid/ganesh/vishak/pranav/ss2113/test_summ.parquet")
df = df[:10]
raw_dataset = Dataset.from_pandas(df)
raw_dataset
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base",truncation=True, max_length=256, padding="max_length")
model = AutoModelForSeq2SeqLM.from_pretrained("/raid/ganesh/vishak/pranav/ss2113/summarization_policy_new").to(device)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
def formatting_func(examples):
    kwargs = {"padding": "max_length",
              "truncation": True,
              "max_length": 256,
              "return_tensors": "pt"
              }
    prompt_plus_chosen_response = examples["prompt"] + "\n" + examples["chosen"]
    prompt_plus_rejected_response = examples["prompt"] + "\n" + examples["rejected"]
    tokens_chosen = tokenizer.encode_plus(prompt_plus_chosen_response, **kwargs)
    tokens_rejected = tokenizer.encode_plus(prompt_plus_rejected_response, **kwargs)
    return {
        "input_ids_chosen": tokens_chosen["input_ids"][0], "attention_mask_chosen": tokens_chosen["attention_mask"][0],
        "input_ids_rejected": tokens_rejected["input_ids"][0], "attention_mask_rejected": tokens_rejected["attention_mask"][0]
    }
formatted_dataset = raw_dataset.map(formatting_func)
formatted_dataset = formatted_dataset.train_test_split()
### Loading the TRL reward trainer and training the trainer
training_args = TrainingArguments(
        output_dir="t5_rm_checkpoint/",
        num_train_epochs=1,
        logging_steps=10,
        gradient_accumulation_steps=1,
        save_strategy="steps",
        evaluation_strategy="steps",
        per_device_train_batch_size=2,
        per_device_eval_batch_size=1,
        eval_accumulation_steps=1,
        eval_steps=500,
        save_steps=500,
        warmup_steps=100,
        logging_dir="./logs",
        learning_rate=1e-5,
        save_total_limit=1,
        no_cuda=True
    )
trainer = RewardTrainer(model=model,
                        tokenizer=tokenizer,
                        train_dataset=formatted_dataset['train'],
                        eval_dataset=formatted_dataset['test'],
                        args= training_args
                        )
trainer.train()
trainer.save_model("t5_rm_model/")
Error:- 
Traceback (most recent call last):
  File "/raid/ganesh/vishak/pranav/ss2113/rlhf_reward_part2.py", line 81, in <module>                                                                                itb-dgx4.iitb.ac.in" 14:05 06-May-24
    trainer.train()
  File "/raid/ganesh/vishak/miniconda3/envs/perturbation-env/lib/python3.10/site-packages/transformers/trainer.py", line 1591, in train
    return inner_training_loop(
  File "/raid/ganesh/vishak/miniconda3/envs/perturbation-env/lib/python3.10/site-packages/transformers/trainer.py", line 1892, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "/raid/ganesh/vishak/miniconda3/envs/perturbation-env/lib/python3.10/site-packages/transformers/trainer.py", line 2776, in training_step
    loss = self.compute_loss(model, inputs)
  File "/raid/ganesh/vishak/miniconda3/envs/perturbation-env/lib/python3.10/site-packages/trl/trainer/reward_trainer.py", line 228, in compute_loss
    rewards_chosen = model(
  File "/raid/ganesh/vishak/miniconda3/envs/perturbation-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/raid/ganesh/vishak/miniconda3/envs/perturbation-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/raid/ganesh/vishak/miniconda3/envs/perturbation-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1746, in forward
    decoder_outputs = self.decoder(
  File "/raid/ganesh/vishak/miniconda3/envs/perturbation-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/raid/ganesh/vishak/miniconda3/envs/perturbation-env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/raid/ganesh/vishak/miniconda3/envs/perturbation-env/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1016, in forward
    raise ValueError(f"You have to specify either {err_msg_prefix}input_ids or {err_msg_prefix}inputs_embeds")
ValueError: You have to specify either decoder_input_ids or decoder_inputs_embeds