Errors while fine-tuning using Keras

Hello there,

I am trying to fine-tune the bart model using pre-downloaded “imdb” datasets following the exact same code from the example Fine-tuning a pretrained model

But, I got the value errors message while I am trying to apply model.fit().

I would greatly appreciate if anyone would have any suggestions or comments on this problem. Please let me know!

My Code:

from datasets import load_dataset
import datasets
raw_dataset = datasets.load_from_disk('./train')

from transformers import TFAutoModelForSequenceClassification
model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)


tokenized_datasets = raw_dataset.map(tokenize_function, batched=True)
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

tf_train_dataset = small_train_dataset.remove_columns(["text"]).with_format("tensorflow")
tf_eval_dataset = small_eval_dataset.remove_columns(["text"]).with_format("tensorflow")

train_features = {x: tf_train_dataset[x] for x in tokenizer.model_input_names}
eval_features = {x: tf_eval_dataset[x] for x in tokenizer.model_input_names}

train_tf_dataset = tf.data.Dataset.from_tensor_slices((train_features, tf_train_dataset["label"]))
train_tf_dataset = train_tf_dataset.shuffle(len(tf_train_dataset)).batch(8)

eval_tf_dataset = tf.data.Dataset.from_tensor_slices((eval_features, tf_eval_dataset["label"]))
eval_tf_dataset = train_tf_dataset.batch(8)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=tf.metrics.SparseCategoricalAccuracy(),
)

model.fit(train_tf_dataset, validation_data=eval_tf_dataset, epochs=3)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [16], in <module>
      1 model.compile(
      2     optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
      3     loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
      4     metrics=tf.metrics.SparseCategoricalAccuracy(),
      5 )
----> 7 model.fit(train_tf_dataset, validation_data=eval_tf_dataset, epochs=3)

File ~/work/.local/lib/python3.8/site-packages/keras/engine/training.py:1184, in Model.fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1177 with tf.profiler.experimental.Trace(
   1178     'train',
   1179     epoch_num=epoch,
   1180     step_num=step,
   1181     batch_size=batch_size,
   1182     _r=1):
   1183   callbacks.on_train_batch_begin(step)
-> 1184   tmp_logs = self.train_function(iterator)
   1185   if data_handler.should_sync:
   1186     context.async_wait()

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:885, in Function.__call__(self, *args, **kwds)
    882 compiler = "xla" if self._jit_compile else "nonXla"
    884 with OptionalXlaContext(self._jit_compile):
--> 885   result = self._call(*args, **kwds)
    887 new_tracing_count = self.experimental_get_tracing_count()
    888 without_tracing = (tracing_count == new_tracing_count)

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:933, in Function._call(self, *args, **kwds)
    930 try:
    931   # This is the first call of __call__, so we have to initialize.
    932   initializers = []
--> 933   self._initialize(args, kwds, add_initializers_to=initializers)
    934 finally:
    935   # At this point we know that the initialization is complete (or less
    936   # interestingly an exception was raised) so we no longer need a lock.
    937   self._lock.release()

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:759, in Function._initialize(self, args, kwds, add_initializers_to)
    756 self._lifted_initializer_graph = lifted_initializer_graph
    757 self._graph_deleter = FunctionDeleter(self._lifted_initializer_graph)
    758 self._concrete_stateful_fn = (
--> 759     self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
    760         *args, **kwds))
    762 def invalid_creator_scope(*unused_args, **unused_kwds):
    763   """Disables variable creation."""

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3066, in Function._get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
   3064   args, kwargs = None, None
   3065 with self._lock:
-> 3066   graph_function, _ = self._maybe_define_function(args, kwargs)
   3067 return graph_function

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3463, in Function._maybe_define_function(self, args, kwargs)
   3459   return self._define_function_with_shape_relaxation(
   3460       args, kwargs, flat_args, filtered_flat_args, cache_key_context)
   3462 self._function_cache.missed.add(call_context_key)
-> 3463 graph_function = self._create_graph_function(args, kwargs)
   3464 self._function_cache.primary[cache_key] = graph_function
   3466 return graph_function, filtered_flat_args

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py:3298, in Function._create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   3293 missing_arg_names = [
   3294     "%s_%d" % (arg, i) for i, arg in enumerate(missing_arg_names)
   3295 ]
   3296 arg_names = base_arg_names + missing_arg_names
   3297 graph_function = ConcreteFunction(
-> 3298     func_graph_module.func_graph_from_py_func(
   3299         self._name,
   3300         self._python_function,
   3301         args,
   3302         kwargs,
   3303         self.input_signature,
   3304         autograph=self._autograph,
   3305         autograph_options=self._autograph_options,
   3306         arg_names=arg_names,
   3307         override_flat_arg_shapes=override_flat_arg_shapes,
   3308         capture_by_value=self._capture_by_value),
   3309     self._function_attributes,
   3310     function_spec=self.function_spec,
   3311     # Tell the ConcreteFunction to clean up its graph once it goes out of
   3312     # scope. This is not the default behavior since it gets used in some
   3313     # places (like Keras) where the FuncGraph lives longer than the
   3314     # ConcreteFunction.
   3315     shared_func_graph=False)
   3316 return graph_function

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:1007, in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
   1004 else:
   1005   _, original_func = tf_decorator.unwrap(python_func)
-> 1007 func_outputs = python_func(*func_args, **func_kwargs)
   1009 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
   1010 # TensorArrays and `None`s.
   1011 func_outputs = nest.map_structure(convert, func_outputs,
   1012                                   expand_composites=True)

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py:668, in Function._defun_with_scope.<locals>.wrapped_fn(*args, **kwds)
    664 with default_graph._variable_creator_scope(scope, priority=50):  # pylint: disable=protected-access
    665   # __wrapped__ allows AutoGraph to swap in a converted function. We give
    666   # the function a weak reference to itself to avoid a reference cycle.
    667   with OptionalXlaContext(compile_with_xla):
--> 668     out = weak_wrapped_fn().__wrapped__(*args, **kwds)
    669   return out

File ~/work/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py:994, in func_graph_from_py_func.<locals>.wrapper(*args, **kwargs)
    992 except Exception as e:  # pylint:disable=broad-except
    993   if hasattr(e, "ag_error_metadata"):
--> 994     raise e.ag_error_metadata.to_exception(e)
    995   else:
    996     raise

ValueError: in user code:

    /home/jovyan/work/.local/lib/python3.8/site-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    /home/jovyan/work/.local/lib/python3.8/site-packages/transformers/models/bert/modeling_tf_bert.py:1421 call  *
        outputs = self.bert(
    /home/jovyan/work/.local/lib/python3.8/site-packages/transformers/models/bert/modeling_tf_bert.py:646 call  *
        raise ValueError("You have to specify either input_ids or inputs_embeds")

    ValueError: You have to specify either input_ids or inputs_embeds

Note that this example, has recently been updated (see the master doc).

Thank you! But, I am wondering if I did anything wrong with my current code? Why it would raise value error? If you could share any your thoughts, that would be really helpful!