Hi community,
I am in the process of fine-tuning a pegasus using Keras with my custom dataset for abstractive text summarization task. But, it raised the errors of asking to specify either decoder_input_ids or decoder_inputs_embeds.
I think that I am having trouble to feed to Keras model the right format of data inputs. I am processing my input data without using data_collector function provided by HuggingFace on purpose. I have attached my code and errors msg below. If anyone could share any thoughts or feedback on this matter, I would really appreciate it. Thank you so much!
import pandas as pd
#from sklearn.model_selection import train_test_split
import numpy as np
import json
from typing import Dict, List, Tuple
from tqdm import tqdm
import tensorflow as tf
print(tf.__version__)
import transformers
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
# PegasusTokenizer, TFPegasusForConditionalGeneration,
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_rows", None, "display.max_columns", None)
# Download model and tokenizer from Huggingface
model_id = "google/pegasus-xsum"
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Read in text-summary dataset from local
non_null_summary_df = pd.read_excel("baseline_data.xlsx")
phrase_list = non_null_summary_df['phrase'].apply(lambda x: str(x)).tolist()
summary_list = non_null_summary_df['summarization'].apply(lambda x: str(x)).tolist()
# Shuffle the dataset
shuffle_df = non_null_summary_df.sample(frac=1)
# Define a size for train set
train_size = int(0.7 * len(non_null_summary_df))
# Split the dataset
train_set = shuffle_df[:train_size]
test_set = shuffle_df[train_size:]
X_train = train_set['phrase'].apply(lambda x: str(x)).tolist()
y_train = train_set['summarization'].apply(lambda x: str(x)).tolist()
X_test = test_set['phrase'].apply(lambda x: str(x)).tolist()
y_test = test_set['summarization'].apply(lambda x: str(x)).tolist()
# Tokenized the text and summaries and return tf tensors
train_input_tf_dataset = tokenizer(X_train, max_length=1024, truncation=True, padding=True, return_tensors="tf")
train_output_tf_ts = tokenizer(y_train, max_length=1024, truncation=True, padding=True, return_tensors="tf").input_ids
eval_input_tf_dataset = tokenizer(X_test, max_length=1024, truncation=True, padding=True, return_tensors="tf")
eval_output_tf_ts = tokenizer(y_test, max_length=1024, truncation=True, padding=True, return_tensors="tf").input_ids
# Create train and eval features
train_features = {'input_ids': train_input_tf_dataset.input_ids,
'attention_mask': train_input_tf_dataset.attention_mask}
eval_features = {'input_ids': eval_input_tf_dataset.input_ids,
'attention_masks': eval_input_tf_dataset.attention_mask}
# Shuffle and batch the features
train_tf_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_output_tf_ts))
train_tf_dataset = train_tf_dataset.shuffle(len(X_train)).batch(8)
eval_tf_dataset = tf.data.Dataset.from_tensor_slices((eval_features, eval_output_tf_ts))
eval_tf_dataset = train_tf_dataset.shuffle(len(X_test)).batch(8)
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam')
# Train the model
model.fit(train_tf_dataset, validation_data=eval_tf_dataset, epochs=3)
Error msg:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [18], in <module>
----> 1 model.fit(train_tf_dataset, validation_data=eval_tf_dataset, epochs=3)
File ~/work/.local/lib/python3.8/site-packages/keras/engine/training.py:1184, in Model.fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1177 with tf.profiler.experimental.Trace(
1178 'train',
1179 epoch_num=epoch,
1180 step_num=step,
1181 batch_size=batch_size,
1182 _r=1):
1183 callbacks.on_train_batch_begin(step)
-> 1184 tmp_logs = self.train_function(iterator)
1185 if data_handler.should_sync:
1186 context.async_wait()
File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:885, in Function.__call__(self, *args, **kwds)
882 compiler = "xla" if self._jit_compile else "nonXla"
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
887 new_tracing_count = self.experimental_get_tracing_count()
888 without_tracing = (tracing_count == new_tracing_count)
File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:933, in Function._call(self, *args, **kwds)
930 try:
931 # This is the first call of __call__, so we have to initialize.
932 initializers = []
--> 933 self._initialize(args, kwds, add_initializers_to=initializers)
934 finally:
935 # At this point we know that the initialization is complete (or less
936 # interestingly an exception was raised) so we no longer need a lock.
937 self._lock.release()
File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:759, in Function._initialize(self, args, kwds, add_initializers_to)
756 self._lifted_initializer_graph = lifted_initializer_graph
757 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
758 self._concrete_stateful_fn = (
--> 759 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
760 *args, **kwds))
762 def invalid_creator_scope(*unused_args, **unused_kwds):
763 """Disables variable creation."""
File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3066, in Function._get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
3064 args, kwargs = None, None
3065 with self._lock:
-> 3066 graph_function, _ = self._maybe_define_function(args, kwargs)
3067 return graph_function
File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3463, in Function._maybe_define_function(self, args, kwargs)
3459 return self._define_function_with_shape_relaxation(
3460 args, kwargs, flat_args, filtered_flat_args, cache_key_context)
3462 self._function_cache.missed.add(call_context_key)
-> 3463 graph_function = self._create_graph_function(args, kwargs)
3464 self._function_cache.primary[cache_key] = graph_function
3466 return graph_function, filtered_flat_args
File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3298, in Function._create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3293 missing_arg_names = [
3294 "%s_%d" % (arg, i) for i, arg in enumerate(missing_arg_names)
3295 ]
3296 arg_names = base_arg_names + missing_arg_names
3297 graph_function = ConcreteFunction(
-> 3298 func_graph_module.func_graph_from_py_func(
3299 self._name,
3300 self._python_function,
3301 args,
3302 kwargs,
3303 self.input_signature,
3304 autograph=self._autograph,
3305 autograph_options=self._autograph_options,
3306 arg_names=arg_names,
3307 override_flat_arg_shapes=override_flat_arg_shapes,
3308 capture_by_value=self._capture_by_value),
3309 self._function_attributes,
3310 function_spec=self.function_spec,
3311 # Tell the ConcreteFunction to clean up its graph once it goes out of
3312 # scope. This is not the default behavior since it gets used in some
3313 # places (like Keras) where the FuncGraph lives longer than the
3314 # ConcreteFunction.
3315 shared_func_graph=False)
3316 return graph_function
File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:1007, in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
1004 else:
1005 _, original_func = tf_decorator.unwrap(python_func)
-> 1007 func_outputs = python_func(*func_args, **func_kwargs)
1009 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
1010 # TensorArrays and `None`s.
1011 func_outputs = nest.map_structure(convert, func_outputs,
1012 expand_composites=True)
File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:668, in Function._defun_with_scope.<locals>.wrapped_fn(*args, **kwds)
664 with default_graph._variable_creator_scope(scope, priority=50): # pylint: disable=protected-access
665 # __wrapped__ allows AutoGraph to swap in a converted function. We give
666 # the function a weak reference to itself to avoid a reference cycle.
667 with OptionalXlaContext(compile_with_xla):
--> 668 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
669 return out
File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:994, in func_graph_from_py_func.<locals>.wrapper(*args, **kwargs)
992 except Exception as e: # pylint:disable=broad-except
993 if hasattr(e, "ag_error_metadata"):
--> 994 raise e.ag_error_metadata.to_exception(e)
995 else:
996 raise
ValueError: in user code:
/home/jovyan/work/.local/lib/python3.8/site-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/home/jovyan/work/.local/lib/python3.8/site-packages/transformers/models/pegasus/modeling_tf_pegasus.py:1433 call *
outputs = self.model(
/home/jovyan/work/.local/lib/python3.8/site-packages/transformers/models/pegasus/modeling_tf_pegasus.py:1190 call *
decoder_outputs = self.decoder(
/home/jovyan/work/.local/lib/python3.8/site-packages/transformers/models/pegasus/modeling_tf_pegasus.py:974 call *
raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
ValueError: You have to specify either decoder_input_ids or decoder_inputs_embeds