based on Finetune BLIP on customer dataset #20893 - #2 by dxlong2000. I set labels as input_ids in model
e.g
class CustomTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
# outputs = model(**inputs)
outputs = model(
input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"],
attention_mask=inputs["attention_mask"], image_embeds_position_mask=inputs["image_embeds_position_mask"],
labels = inputs["input_ids"]
)
logits = outputs.logits
# Ensure that 'eval_loss' is present in the metrics dictionary
metrics = {'eval_loss': outputs.loss.item()}
return (outputs.loss, outputs) if return_outputs else outputs.loss