Paligemma model Forward Method Not Returning Loss in Trainer #31045

I was trying to finetune goggle new vision lanaguge model and I keep getting this error

  • I was running the code on kaggle notebook free T4 and also tried with P100
  • Google Colab
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[14], line 1
----> 1 trainer.train()

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1876, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   1873 try:
   1874     # Disable progress bars when uploading models during checkpoints to avoid polluting stdout
   1875     hf_hub_utils.disable_progress_bars()
-> 1876     return inner_training_loop(
   1877         args=args,
   1878         resume_from_checkpoint=resume_from_checkpoint,
   1879         trial=trial,
   1880         ignore_keys_for_eval=ignore_keys_for_eval,
   1881     )
   1882 finally:
   1883     hf_hub_utils.enable_progress_bars()

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2216, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   2213     self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
   2215 with self.accelerator.accumulate(model):
-> 2216     tr_loss_step = self.training_step(model, inputs)
   2218 if (
   2219     args.logging_nan_inf_filter
   2220     and not is_torch_xla_available()
   2221     and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
   2222 ):
   2223     # if loss is nan or inf simply add the average of previous logged losses
   2224     tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:3238, in Trainer.training_step(self, model, inputs)
   3235     return loss_mb.reduce_mean().detach().to(self.args.device)
   3237 with self.compute_loss_context_manager():
-> 3238     loss = self.compute_loss(model, inputs)
   3240 del inputs
   3241 torch.cuda.empty_cache()

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:3282, in Trainer.compute_loss(self, model, inputs, return_outputs)
   3280 else:
   3281     if isinstance(outputs, dict) and "loss" not in outputs:
-> 3282         raise ValueError(
   3283             "The model did not return a loss from the inputs, only the following keys: "
   3284             f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
   3285         )
   3286     # We don't use .loss here since the model may return tuples instead of ModelOutput.
   3287     loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0]

ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask,pixel_values,labels.