ValueError: Unable to create tensor, activate padding with 'padding=True'

ojdeboer · November 13, 2024, 9:00am

I’m working on training a model for image segmentation using mask2former, but I’m encountering an issue when using AutoImageProcessor. I’ve included my code and environment details below. My input images and masks are RGB 8 bit images resp, and 512*512 pixels. Adding ‘padding=true’ to the processor does not solve the problem.

My code:

‘’’
import os
from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation, Trainer, TrainingArguments
import torch
from PIL import Image
import numpy as np

local_save_path = “./saved_models/m2f/”
model_name = “facebook/mask2former-swin-large-coco-instance”

model = Mask2FormerForUniversalSegmentation.from_pretrained(model_name)
processor = AutoImageProcessor.from_pretrained(model_name)

class SegmentationDataset:
def init(self, image_dir, mask_dir, processor, image_size=(512, 512)):
self.image_dir = image_dir
self.mask_dir = mask_dir
self.images = sorted(os.listdir(image_dir))
self.masks = sorted(os.listdir(mask_dir))
self.processor = processor
self.image_size = image_size

def __len__(self):
    return len(self.images)

def __getitem__(self, idx):
    image_path = os.path.join(self.image_dir, self.images[idx])
    mask_path = os.path.join(self.mask_dir, self.masks[idx])

    image = Image.open(image_path).convert("RGB").resize(self.image_size)
    mask = Image.open(mask_path).convert("L").resize(self.image_size)

    image = np.array(image,dtype=np.float32)
    mask = np.array(mask, dtype=np.float32)

    print(f"Image shape: {image.shape}")
    print(f"Mask shape: {mask.shape}")

    encoded_inputs = self.processor(
        images=image,
        segmentation_maps=mask,
        return_tensors="pt"
    )

    print(f"Image shape after processing: {encoded_inputs['pixel_values'].shape}")
    print(f"Mask shape after processing: {encoded_inputs['labels'].shape}")

    return encoded_inputs

Initialize datasets

train_dataset = SegmentationDataset(
image_dir=“/mnt/data1/pancreas/exp3/debug_dataset/images/train”,
mask_dir=“/mnt/data1/pancreas/exp3/debug_dataset/masks/train”,
processor=processor
)

val_dataset = SegmentationDataset(
image_dir=“/mnt/data1/pancreas/exp3/debug_dataset/images/val”,
mask_dir=“/mnt/data1/pancreas/exp3/debug_dataset/masks/val”,
processor=processor
)

Training arguments

training_args = TrainingArguments(
output_dir=local_save_path,
eval_strategy=“epoch”,
per_device_train_batch_size=2,
per_device_eval_batch_size=2,
num_train_epochs=10,
save_steps=1000,
save_total_limit=2,
logging_dir=‘./logs’,
logging_steps=100,
dataloader_drop_last=True # Drops last incomplete batch if present
)

Initialize Trainer

trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
)

Train the model

trainer.train()
‘’’

And the output/error message:

/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/utils/deprecation.py:165: UserWarning: The following named arguments are not valid for Mask2FormerImageProcessor.__init__ and were ignored: ‘_max_size’
return func(*args, **kwargs)
0%| | 0/2500 [00:00<?, ?it/s]Image shape: (512, 512, 3)
Mask shape: (512, 512)
Traceback (most recent call last):
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/feature_extraction_utils.py”, line 193, in convert_to_tensors
tensor = as_tensor(value)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/feature_extraction_utils.py”, line 150, in as_tensor
return torch.from_numpy(value)
TypeError: expected np.ndarray (got numpy.ndarray)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File “/mnt/home/pycharm/huggingface/train.py”, line 88, in
trainer.train()
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/trainer.py”, line 2123, in train
return inner_training_loop(
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/trainer.py”, line 2427, in _inner_training_loop
batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/trainer.py”, line 5045, in get_batch_samples
batch_samples += [next(epoch_iterator)]
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/accelerate/data_loader.py”, line 552, in iter
current_batch = next(dataloader_iter)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/torch/utils/data/dataloader.py”, line 701, in next
data = self._next_data()
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/torch/utils/data/dataloader.py”, line 757, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py”, line 52, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py”, line 52, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File “/mnt/home/pycharm/huggingface/train.py”, line 40, in getitem
encoded_inputs = self.processor(
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/models/mask2former/image_processing_mask2former.py”, line 573, in call
return self.preprocess(images, segmentation_maps=segmentation_maps, **kwargs)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/utils/deprecation.py”, line 165, in wrapped_func
return func(*args, **kwargs)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/utils/generic.py”, line 852, in wrapper
return func(*args, **valid_kwargs)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/models/mask2former/image_processing_mask2former.py”, line 768, in preprocess
encoded_inputs = self.encode_inputs(
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/models/mask2former/image_processing_mask2former.py”, line 931, in encode_inputs
encoded_inputs = self.pad(
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/models/mask2former/image_processing_mask2former.py”, line 861, in pad
return BatchFeature(data=data, tensor_type=return_tensors)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/feature_extraction_utils.py”, line 79, in init
self.convert_to_tensors(tensor_type=tensor_type)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/feature_extraction_utils.py”, line 199, in convert_to_tensors
raise ValueError(
ValueError: Unable to create tensor, you should probably activate padding with ‘padding=True’ to have batched tensors with the same length.
0%| | 0/2500 [00:00<?, ?it/s]

Process finished with exit code 1

My transformers environment:

transformers version: 4.46.2
Platform: Linux-6.8.0-48-generic-x86_64-with-glibc2.35
Python version: 3.10.15
Huggingface_hub version: 0.24.6
Safetensors version: 0.4.5
Accelerate version: 1.1.1
Accelerate config: not found
PyTorch version (GPU?): 2.5.1 (True)
Tensorflow version (GPU?): not installed (NA)
Flax version (CPU?/GPU?/TPU?): not installed (NA)
Jax version: not installed
JaxLib version: not installed
Using distributed or parallel set-up in script?:
Using GPU in script?:
GPU type: NVIDIA GeForce RTX 3090

I suspect this issue might be due to the shape of the processed data or the need for padding. Should I modify the processor or handle padding differently to enable batched tensors? Thank you very much in advance for any advice!

John6666 · November 13, 2024, 11:55am

It is known that there is a bug in the current Transformer image preprocessor, and as we are still investigating the details, there is no reliable workaround.
If this error is due to the bug, there is a possibility that the following workaround can be used to avoid it.
In other words, it is possible to avoid the error by performing as much of the preprocessing as possible here and passing it in PIL.Image.Image format.

    #image = np.array(image,dtype=np.float32)
    #mask = np.array(mask, dtype=np.float32)

    #print(f"Image shape: {image.shape}")
    #print(f"Mask shape: {mask.shape}")

    encoded_inputs = self.processor(
        images=[image],
        segmentation_maps=[mask], # Maybe I shouldn't make a list. I don't know.
        return_tensors="pt"
    )

ojdeboer · November 13, 2024, 2:03pm

Thank you John,
Problem is solved, it had to do with an old Numpy version…

John6666 · November 13, 2024, 2:04pm

Oh… numpy<2 problem…

Topic		Replies	Views
Unable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length Beginners	1	59	November 6, 2024
Unable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length. (Paligemma) 🤗Transformers	2	733	July 3, 2024
ValueError: Unable to create tensor, you should probably activate truncation and/or padding with ‘padding=True’ ‘truncation=True’ 🤗Transformers	1	645	November 22, 2023
How do you use segmentation image processor with more than 3 channel images? Beginners	1	241	May 13, 2024
Wav2Vec2 - ValueError: Unable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length 🤗Transformers	1	414	November 27, 2023

ValueError: Unable to create tensor, activate padding with 'padding=True'

Initialize datasets

Training arguments

Initialize Trainer

Train the model

Related topics