Hi, I have a related issue I am trying to train the TrOCR model on my own data what I tried:
# the class for loading data has function :
def __getitem__(self, idx):
file_name = self.df['file_name'][idx]
text = self.df['text'][idx]
# prepare image (i.e. resize + normalize)
image = Image.open(self.root_dir + file_name).convert("RGB")
pixel_values = self.processor(image, return_tensors="pt").pixel_values
labels = self.processor.tokenizer(text, padding="max_length",
max_length=self.max_target_length).input_ids
labels = [label if label != self.processor.tokenizer.pad_token_id else -100 for label in
labels]
encoding = {"pixel_values": pixel_values.squeeze(), "labels": torch.tensor(labels)}
return encoding
training_args = Seq2SeqTrainingArguments(
num_train_epochs=25,
learning_rate=5e-5,
predict_with_generate=True,
evaluation_strategy="steps",
per_device_train_batch_size=64,
per_device_eval_batch_size=64,
fp16=True,
output_dir="/1/large/",
logging_steps=100,
save_steps=2000,
eval_steps=5000,
)
trainer = Seq2SeqTrainer( model=model, tokenizer=processor.feature_extractor,
args=training_args, compute_metrics=compute_metrics, train_dataset=train_dataset,
eval_dataset=eval_dataset, data_collator=default_data_collator, )
the feed input image to the model has a height of 64
fixed for all and different width
The issue I see is: where the training stops after a few hours
Traceback (most recent call last):
File "train.py", line 191, in <module>
main()
File "train.py", line 173, in main
trainer.train()
File "/home/user/venv/lib/python3.8/site-packages/transformers/trainer.py", line 1521, in train
return inner_training_loop(
File "/home/user/venv/lib/python3.8/site-packages/transformers/trainer.py", line 1737, in _inner_training_loop
for step, inputs in enumerate(epoch_iterator):
File "/home/user/venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 435, in __next__
data = self._next_data()
File "/home/user/venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 475, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/home/user/venv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
return self.collate_fn(data)
File "/home/user/venv/lib/python3.8/site-packages/transformers/trainer_utils.py", line 696, in __call__
return self.data_collator(features)
File "/home/user/venv/lib/python3.8/site-packages/transformers/data/data_collator.py", line 67, in default_data_collator
return torch_default_data_collator(features)
File "/home/user/venv/lib/python3.8/site-packages/transformers/data/data_collator.py", line 129, in torch_default_data_collator
batch[k] = torch.stack([f[k] for f in features])
RuntimeError: stack expects each tensor to be equal size, but got [128] at entry 0 and [139] at entry 19
1%|▊ | 1356/166025 [40:59<82:58:15, 1.81s/it]
Transformer version: 4.22.2
@sgugger
@NielsRogge