I am trying to fine-tune "distilgpt2 " for text classification. I get the error below when trying to train the model [e.g., model.fit()
]
My system is using:
- Keras ‘2.14.0’
- tensorflow ‘2.14.1’
- transformers ‘4.39.dev0’
could someone provide feedback? thanks.
TypeError: in user code:
File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 1160, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 1146, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.9/site-packages/keras/engine/training.py", line 1135, in run_step **
outputs = model.train_step(data)
File "/home/nbucar1/.local/lib/python3.9/site-packages/transformers/modeling_tf_utils.py", line 1637, in train_step
y_pred = self(x, training=True)
File "/usr/local/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/tmp/__autograph_generated_filemwik0t34.py", line 37, in tf__run_call_with_unpacked_inputs
retval_ = ag__.converted_call(ag__.ld(func), (ag__.ld(self),), dict(**ag__.ld(unpacked_inputs)), fscope)
File "/tmp/__autograph_generated_filebbszqrkw.py", line 56, in tf__call
ag__.if_stmt(ag__.ld(self).config.pad_token_id is None, if_body_1, else_body_1, get_state_1, set_state_1, ('in_logits', 'sequence_lengths'), 2)
File "/tmp/__autograph_generated_filebbszqrkw.py", line 54, in else_body_1
ag__.if_stmt(ag__.ld(input_ids) is not None, if_body, else_body, get_state, set_state, ('in_logits', 'sequence_lengths'), 2)
File "/tmp/__autograph_generated_filebbszqrkw.py", line 46, in if_body
sequence_lengths = ag__.converted_call(ag__.ld(tf).where, (ag__.ld(sequence_lengths) >= 0, ag__.ld(sequence_lengths), ag__.ld(input_ids).shape[-1] - 1), None, fscope)
TypeError: Exception encountered when calling layer "tfgpt2_for_sequence_classification" " f"(type TFGPT2ForSequenceClassification).
in user code:
File "/home/nbucar1/.local/lib/python3.9/site-packages/transformers/modeling_tf_utils.py", line 1163, in run_call_with_unpacked_inputs *
return func(self, **unpacked_inputs)
File "/home/nbucar1/.local/lib/python3.9/site-packages/transformers/models/gpt2/modeling_tf_gpt2.py", line 1201, in call *
sequence_lengths = tf.where(sequence_lengths >= 0, sequence_lengths, input_ids.shape[-1] - 1)
TypeError: unsupported operand type(s) for -: 'NoneType' and 'int'
Call arguments received by layer "tfgpt2_for_sequence_classification" " f"(type TFGPT2ForSequenceClassification):
• self={'input_ids': 'tf.Tensor(shape=(None, None), dtype=int64)', 'attention_mask': 'tf.Tensor(shape=(None, None), dtype=int64)', 'labels': 'tf.Tensor(shape=(None,), dtype=int64)'}
• input_ids=None
• past_key_values=None
• attention_mask=None
• token_type_ids=None
• position_ids=None
• head_mask=None
• inputs_embeds=None
• use_cache=None
• output_attentions=None
• output_hidden_states=None
• return_dict=None
• labels=None
• training=True