I have used many ways to create the dataset and feed it to the model but everytime I am getting the same error , I am new to hugging face , Can anyone please help me how to train a TFAutoModelForSeq2SeqLM model please
In my dataset both Input and Output are sentences
I want a model that mimic a person.
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
import tensorflow as tf
# Load model directly
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model = TFAutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")
from datasets import load_dataset
dataset = load_dataset("csv", data_files="/content/conversation_data.csv", sep=",",split='train')
max_input_length = 1024
max_target_length = 128
def preprocess_function(examples):
model_inputs = tokenizer(examples["Input"], max_length=max_input_length, truncation=True)
# Setup the tokenizer for targets
with tokenizer.as_target_tokenizer():
labels = tokenizer(examples["Output"], max_length=max_target_length, truncation=True)
model_inputs["labels"] = labels["input_ids"]
return model_inputs
tokenized_datasets = dataset.map(
preprocess_function, batched=True, remove_columns=["Input", "Output"]
)
# Dataset({
# features: ['input_ids', 'attention_mask', 'labels'],
# num_rows: 2304
#})
from transformers import AutoTokenizer, DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
tf_train_dataset = tokenized_datasets.to_tf_dataset(
columns=["attention_mask", "input_ids"],
label_cols=["labels"],
shuffle=True,
collate_fn=data_collator,
batch_size=8,
)
# Compile and train the model
model.compile(optimizer=Adam(learning_rate=3e-5), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.fit(tf_train_dataset)
ValueError: Exception encountered when calling layer 'model' (type TFBlenderbotMainLayer).
ValueError: You have to specify either decoder_input_ids or decoder_inputs_embeds
Call arguments received by layer 'decoder' (type TFBlenderbotDecoder):
• input_ids=None
• inputs_embeds=None
• attention_mask=None
• position_ids=None
• encoder_hidden_states=tf.Tensor(shape=(None, None, 1280), dtype=float32)
• encoder_attention_mask=tf.Tensor(shape=(None, None), dtype=int32)
• head_mask=None
• cross_attn_head_mask=None
• past_key_values=None
• use_cache=True
• output_attentions=False
• output_hidden_states=False
• return_dict=True
• training=True
Call arguments received by layer 'model' (type TFBlenderbotMainLayer):
• input_ids=tf.Tensor(shape=(None, None), dtype=int32)
• attention_mask=tf.Tensor(shape=(None, None), dtype=int32)
• decoder_input_ids=None
• decoder_attention_mask=None
• decoder_position_ids=None
• head_mask=None
• decoder_head_mask=None
• cross_attn_head_mask=None
• encoder_outputs=None
• past_key_values=None
• inputs_embeds=None
• decoder_inputs_embeds=None
• use_cache=True
• output_attentions=False
• output_hidden_states=False
• return_dict=True
• training=True
• kwargs=<class 'inspect._empty'>
Call arguments received by layer 'tf_blenderbot_for_conditional_generation' (type TFBlenderbotForConditionalGeneration):
• input_ids={'input_ids': 'tf.Tensor(shape=(None, None), dtype=int64)', 'attention_mask': 'tf.Tensor(shape=(None, None), dtype=int64)'}
• attention_mask=None
• decoder_input_ids=None
• decoder_attention_mask=None
• decoder_position_ids=None
• head_mask=None
• decoder_head_mask=None
• cross_attn_head_mask=None
• encoder_outputs=None
• past_key_values=None
• inputs_embeds=None
• decoder_inputs_embeds=None
• use_cache=None
• output_attentions=None
• output_hidden_states=None
• return_dict=None
• labels=None
• training=True