image = Image.open(test_df['Paths'][image_index]).convert("RGB")
image
pixel_values = processor.feature_extractor(image, return_tensors="pt").pixel_values
print("Pixel Values Shape:", pixel_values.shape)
generated_ids = model.generate(pixel_values)
print("Generated IDs:", generated_ids)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
print("Generated Text:", generated_text)
I have the above code and it runs smoothly but there is no output when I try to print the generated text.
My model was trained as follows:
model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
"google/vit-base-patch16-224-in21k", "bert-base-chinese"
)
model.config.decoder.is_decoder = True
model.config.decoder.add_cross_attention = True
model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
model.config.pad_token_id = processor.tokenizer.pad_token_id
model.config.vocab_size = model.config.decoder.vocab_size
model.config.eos_token_id = processor.tokenizer.sep_token_id
model.config.max_length = 64
model.config.early_stopping = True
model.config.no_repeat_ngram_size = 3
model.config.length_penalty = 2.0
model.config.num_beams = 4
training_args = Seq2SeqTrainingArguments(
output_dir="./",
evaluation_strategy="steps",
learning_rate=1e-4, # Decreased learning rate
per_device_train_batch_size=10, # Decreased batch size
per_device_eval_batch_size=10, # Decreased batch size
weight_decay=0.01,
save_total_limit=3,
num_train_epochs=5, # Decreased number of epochs
logging_dir="./logs",
logging_steps=500,
save_steps=1000,
save_strategy="steps",
predict_with_generate=True,
max_steps=100,
gradient_accumulation_steps=2
)
cer_metric = load_metric("cer")
def compute_metrics(pred):
labels_ids = pred.label_ids
pred_ids = pred.predictions
pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
labels_ids[labels_ids == -100] = processor.tokenizer.pad_token_id
label_str = processor.batch_decode(labels_ids, skip_special_tokens=True)
cer = cer_metric.compute(predictions=pred_str, references=label_str)
return {"cer": cer}
os.environ["TOKENIZERS_PARALLELISM"] = "false"
optimizer = TorchAdamW(model.parameters(), lr=training_args.learning_rate)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=500,
num_training_steps=len(train_dataset) * training_args.num_train_epochs
)
# Create the trainer
trainer = Seq2SeqTrainer(
model=model,
tokenizer=processor.tokenizer,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=train_dataset,
eval_dataset=test_dataset,
data_collator=default_data_collator,
optimizers=(optimizer, scheduler)
)
# Train the model
trainer.train()
Please let me know how I can debug this!