Errors when fine-tuning using Keras

Hi community,

I am in the process of fine-tuning a pegasus using Keras with my custom dataset for abstractive text summarization task. But, it raised the errors of asking to specify either decoder_input_ids or decoder_inputs_embeds.

I think that I am having trouble to feed to Keras model the right format of data inputs. I am processing my input data without using data_collector function provided by HuggingFace on purpose. I have attached my code and errors msg below. If anyone could share any thoughts or feedback on this matter, I would really appreciate it. Thank you so much!

import pandas as pd
#from sklearn.model_selection import train_test_split
import numpy as np
import json
from typing import Dict, List, Tuple
from tqdm import tqdm

import tensorflow as tf
print(tf.__version__)

import transformers
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
# PegasusTokenizer, TFPegasusForConditionalGeneration,

import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_rows", None, "display.max_columns", None)

# Download model and tokenizer from Huggingface
model_id = "google/pegasus-xsum"
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Read in text-summary dataset from local
non_null_summary_df = pd.read_excel("baseline_data.xlsx")
phrase_list = non_null_summary_df['phrase'].apply(lambda x: str(x)).tolist()
summary_list = non_null_summary_df['summarization'].apply(lambda x: str(x)).tolist()

# Shuffle the dataset 
shuffle_df = non_null_summary_df.sample(frac=1)

# Define a size for train set 
train_size = int(0.7 * len(non_null_summary_df))

# Split the dataset 
train_set = shuffle_df[:train_size]
test_set = shuffle_df[train_size:]

X_train = train_set['phrase'].apply(lambda x: str(x)).tolist()
y_train = train_set['summarization'].apply(lambda x: str(x)).tolist()

X_test = test_set['phrase'].apply(lambda x: str(x)).tolist()
y_test = test_set['summarization'].apply(lambda x: str(x)).tolist()

# Tokenized the text and summaries and return tf tensors
train_input_tf_dataset = tokenizer(X_train, max_length=1024, truncation=True, padding=True, return_tensors="tf")
train_output_tf_ts = tokenizer(y_train, max_length=1024, truncation=True, padding=True, return_tensors="tf").input_ids

eval_input_tf_dataset = tokenizer(X_test, max_length=1024, truncation=True, padding=True, return_tensors="tf")
eval_output_tf_ts = tokenizer(y_test, max_length=1024, truncation=True, padding=True, return_tensors="tf").input_ids

# Create train and eval features
train_features = {'input_ids': train_input_tf_dataset.input_ids,
                  'attention_mask': train_input_tf_dataset.attention_mask}

eval_features = {'input_ids': eval_input_tf_dataset.input_ids,
                 'attention_masks': eval_input_tf_dataset.attention_mask}

# Shuffle and batch the features
train_tf_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_output_tf_ts))
train_tf_dataset = train_tf_dataset.shuffle(len(X_train)).batch(8)

eval_tf_dataset = tf.data.Dataset.from_tensor_slices((eval_features, eval_output_tf_ts))
eval_tf_dataset = train_tf_dataset.shuffle(len(X_test)).batch(8)

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam')

# Train the model
model.fit(train_tf_dataset, validation_data=eval_tf_dataset, epochs=3)

Error msg:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [18], in <module>
----> 1 model.fit(train_tf_dataset, validation_data=eval_tf_dataset, epochs=3)

File ~/work/.local/lib/python3.8/site-packages/keras/engine/training.py:1184, in Model.fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1177 with tf.profiler.experimental.Trace(
   1178     'train',
   1179     epoch_num=epoch,
   1180     step_num=step,
   1181     batch_size=batch_size,
   1182     _r=1):
   1183   callbacks.on_train_batch_begin(step)
-> 1184   tmp_logs = self.train_function(iterator)
   1185   if data_handler.should_sync:
   1186     context.async_wait()

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:885, in Function.__call__(self, *args, **kwds)
    882 compiler = "xla" if self._jit_compile else "nonXla"
    884 with OptionalXlaContext(self._jit_compile):
--> 885   result = self._call(*args, **kwds)
    887 new_tracing_count = self.experimental_get_tracing_count()
    888 without_tracing = (tracing_count == new_tracing_count)

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:933, in Function._call(self, *args, **kwds)
    930 try:
    931   # This is the first call of __call__, so we have to initialize.
    932   initializers = []
--> 933   self._initialize(args, kwds, add_initializers_to=initializers)
    934 finally:
    935   # At this point we know that the initialization is complete (or less
    936   # interestingly an exception was raised) so we no longer need a lock.
    937   self._lock.release()

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:759, in Function._initialize(self, args, kwds, add_initializers_to)
    756 self._lifted_initializer_graph = lifted_initializer_graph
    757 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
    758 self._concrete_stateful_fn = (
--> 759     self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
    760         *args, **kwds))
    762 def invalid_creator_scope(*unused_args, **unused_kwds):
    763   """Disables variable creation."""

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3066, in Function._get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
   3064   args, kwargs = None, None
   3065 with self._lock:
-> 3066   graph_function, _ = self._maybe_define_function(args, kwargs)
   3067 return graph_function

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3463, in Function._maybe_define_function(self, args, kwargs)
   3459   return self._define_function_with_shape_relaxation(
   3460       args, kwargs, flat_args, filtered_flat_args, cache_key_context)
   3462 self._function_cache.missed.add(call_context_key)
-> 3463 graph_function = self._create_graph_function(args, kwargs)
   3464 self._function_cache.primary[cache_key] = graph_function
   3466 return graph_function, filtered_flat_args

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3298, in Function._create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   3293 missing_arg_names = [
   3294     "%s_%d" % (arg, i) for i, arg in enumerate(missing_arg_names)
   3295 ]
   3296 arg_names = base_arg_names + missing_arg_names
   3297 graph_function = ConcreteFunction(
-> 3298     func_graph_module.func_graph_from_py_func(
   3299         self._name,
   3300         self._python_function,
   3301         args,
   3302         kwargs,
   3303         self.input_signature,
   3304         autograph=self._autograph,
   3305         autograph_options=self._autograph_options,
   3306         arg_names=arg_names,
   3307         override_flat_arg_shapes=override_flat_arg_shapes,
   3308         capture_by_value=self._capture_by_value),
   3309     self._function_attributes,
   3310     function_spec=self.function_spec,
   3311     # Tell the ConcreteFunction to clean up its graph once it goes out of
   3312     # scope. This is not the default behavior since it gets used in some
   3313     # places (like Keras) where the FuncGraph lives longer than the
   3314     # ConcreteFunction.
   3315     shared_func_graph=False)
   3316 return graph_function

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:1007, in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
   1004 else:
   1005   _, original_func = tf_decorator.unwrap(python_func)
-> 1007 func_outputs = python_func(*func_args, **func_kwargs)
   1009 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
   1010 # TensorArrays and `None`s.
   1011 func_outputs = nest.map_structure(convert, func_outputs,
   1012                                   expand_composites=True)

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:668, in Function._defun_with_scope.<locals>.wrapped_fn(*args, **kwds)
    664 with default_graph._variable_creator_scope(scope, priority=50):  # pylint: disable=protected-access
    665   # __wrapped__ allows AutoGraph to swap in a converted function. We give
    666   # the function a weak reference to itself to avoid a reference cycle.
    667   with OptionalXlaContext(compile_with_xla):
--> 668     out = weak_wrapped_fn().__wrapped__(*args, **kwds)
    669   return out

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:994, in func_graph_from_py_func.<locals>.wrapper(*args, **kwargs)
    992 except Exception as e:  # pylint:disable=broad-except
    993   if hasattr(e, "ag_error_metadata"):
--> 994     raise e.ag_error_metadata.to_exception(e)
    995   else:
    996     raise

ValueError: in user code:

    /home/jovyan/work/.local/lib/python3.8/site-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    /home/jovyan/work/.local/lib/python3.8/site-packages/transformers/models/pegasus/modeling_tf_pegasus.py:1433 call  *
        outputs = self.model(
    /home/jovyan/work/.local/lib/python3.8/site-packages/transformers/models/pegasus/modeling_tf_pegasus.py:1190 call  *
        decoder_outputs = self.decoder(
    /home/jovyan/work/.local/lib/python3.8/site-packages/transformers/models/pegasus/modeling_tf_pegasus.py:974 call  *
        raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")

    ValueError: You have to specify either decoder_input_ids or decoder_inputs_embeds