Token tensors arent of same length

from transformers import AutoTokenizer,VisionEncoderDecoderModel,ViTImageProcessor
model_name = "nlpconnect/vit-gpt2-image-captioning"
model=VisionEncoderDecoderModel.from_pretrained(model_name)
feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning") 

from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, images, captions):
        self.images = images
        self.captions = captions
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = self.images[idx]
        caption = self.captions[idx]
        
        
 
 preprocessed_image=feature_extractor(image,return_tensors="pt",max_length=model_max_length,padding=True).pixel_values
        preprocessed_text=tokenizer(caption,padding=True,truncation=True,max_length=model_max_length,return_tensors="pt").input_ids

        
        inputs = {
            'pixel_values': preprocessed_image,
            'labels': preprocessed_text,
        }
        
        return inputs

dataset=CustomDataset(fused_images,fused_images_phrases) 
for i in range(10):
    print(dataset[i]['labels'].shape) # to print the output tensor shape of descriptions

'''
for this I am getting the output as : 
torch.Size([1, 6])
torch.Size([1, 7])
torch.Size([1, 4])
torch.Size([1, 4])
torch.Size([1, 8])
torch.Size([1, 8])
torch.Size([1, 6])
torch.Size([1, 6])
torch.Size([1, 6])
torch.Size([1, 6])

i dont know why my tensors arent getting shaped into uniform tensors?