Hi,
I’m trying to fine-tune pretrained TFGPT2LMHeadModel but I get an error on the labels shape.
I tried to pass the input id’s of each sentence, shape: (batch size, sentence length) or shape: (batch size, sentence length, vocab size). (as the logits)
in the example bellow, sentence length = 14, batch size = 2.
my code when labels shape = (batch size, sentence length)):
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2LMHeadModel.from_pretrained('gpt2')
inputs = tokenizer(list_of_sentenses, return_tensors="tf",return_token_type_ids=True)
features = {'input_ids':inputs['input_ids'],'attention_mask':inputs['attention_mask'],'token_type_ids':inputs['token_type_ids']}
# for LMHeadModel the labels is the input ids of the sentens
labels = inputs['input_ids']
features_dataset = Dataset.from_tensor_slices(features)
labels_dataset = Dataset.from_tensor_slices(labels)
dataset = Dataset.zip((features_dataset, labels_dataset))
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
model.compile(optimizer=optimizer, loss=model.compute_loss)
model.fit(dataset)
ValueError: Shapes (1792,) and (14,) are incompatible
my code when label shape = (batch size, sentence length, vocab size):
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2LMHeadModel.from_pretrained('gpt2')
inputs = tokenizer(list_of_sentenses, return_tensors="tf",return_token_type_ids=True)
features = {'input_ids':inputs['input_ids'],'attention_mask':inputs['attention_mask'],'token_type_ids':inputs['token_type_ids']}
# for LMHeadModel the labels is the input ids of the sentens
labels = tf.expand_dims(tf.one_hot(inputs['input_ids'], 50257),2)
features_dataset = Dataset.from_tensor_slices(features)
labels_dataset = Dataset.from_tensor_slices(labels)
dataset = Dataset.zip((features_dataset, labels_dataset))
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
model.compile(optimizer=optimizer, loss=model.compute_loss)
model.fit(dataset)
ValueError: Shapes (14,) and (703598,) are incompatible
Thanks!