See
We just need the line
inputs[“labels”] = inputs[“input_ids”]
as in
(note, I haven’t addressed yet the padding and -100 in the labels)
from datasets import Dataset
inputs = processor(images = train_df['image'].to_list(), text = train_df['text'].to_list(), bboxes = train_df['float_val'].to_list(), padding=True, truncation= True, return_tensors="pt").to(device)
inputs["labels"] = inputs["input_ids"]
dataset = Dataset.from_dict(inputs)
train_test_split = dataset.train_test_split(test_size=0.3)