from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
processor = TrOCRProcessor.from_pretrained(r".\TrOCR")
model = VisionEncoderDecoderModel.from_pretrained(r".\TrOCR")
image = Image.open(r".\ld.png").convert(“RGB”)
pixel_values = processor(image, return_tensors=“pt”,attention_mask=True,Truncation=True).pixel_values
generated_ids = model.generate(pixel_values,pad_token_id=2,eos_token_id=2,max_new_tokens = 10000) #for open-end generation.generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
generated_text
OUTPUT: ‘THIS IS I1 MEANS L & 1’