I’m working on training a model for image segmentation using mask2former, but I’m encountering an issue when using AutoImageProcessor. I’ve included my code and environment details below. My input images and masks are RGB 8 bit images resp, and 512*512 pixels. Adding ‘padding=true’ to the processor does not solve the problem.
My code:
‘’’
import os
from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation, Trainer, TrainingArguments
import torch
from PIL import Image
import numpy as np
local_save_path = “./saved_models/m2f/”
model_name = “facebook/mask2former-swin-large-coco-instance”
model = Mask2FormerForUniversalSegmentation.from_pretrained(model_name)
processor = AutoImageProcessor.from_pretrained(model_name)
class SegmentationDataset:
def init(self, image_dir, mask_dir, processor, image_size=(512, 512)):
self.image_dir = image_dir
self.mask_dir = mask_dir
self.images = sorted(os.listdir(image_dir))
self.masks = sorted(os.listdir(mask_dir))
self.processor = processor
self.image_size = image_size
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
image_path = os.path.join(self.image_dir, self.images[idx])
mask_path = os.path.join(self.mask_dir, self.masks[idx])
image = Image.open(image_path).convert("RGB").resize(self.image_size)
mask = Image.open(mask_path).convert("L").resize(self.image_size)
image = np.array(image,dtype=np.float32)
mask = np.array(mask, dtype=np.float32)
print(f"Image shape: {image.shape}")
print(f"Mask shape: {mask.shape}")
encoded_inputs = self.processor(
images=image,
segmentation_maps=mask,
return_tensors="pt"
)
print(f"Image shape after processing: {encoded_inputs['pixel_values'].shape}")
print(f"Mask shape after processing: {encoded_inputs['labels'].shape}")
return encoded_inputs
Initialize datasets
train_dataset = SegmentationDataset(
image_dir=“/mnt/data1/pancreas/exp3/debug_dataset/images/train”,
mask_dir=“/mnt/data1/pancreas/exp3/debug_dataset/masks/train”,
processor=processor
)
val_dataset = SegmentationDataset(
image_dir=“/mnt/data1/pancreas/exp3/debug_dataset/images/val”,
mask_dir=“/mnt/data1/pancreas/exp3/debug_dataset/masks/val”,
processor=processor
)
Training arguments
training_args = TrainingArguments(
output_dir=local_save_path,
eval_strategy=“epoch”,
per_device_train_batch_size=2,
per_device_eval_batch_size=2,
num_train_epochs=10,
save_steps=1000,
save_total_limit=2,
logging_dir=‘./logs’,
logging_steps=100,
dataloader_drop_last=True # Drops last incomplete batch if present
)
Initialize Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
)
Train the model
trainer.train()
‘’’
And the output/error message:
/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/utils/deprecation.py:165: UserWarning: The following named arguments are not valid for Mask2FormerImageProcessor.__init__
and were ignored: ‘_max_size’
return func(*args, **kwargs)
0%| | 0/2500 [00:00<?, ?it/s]Image shape: (512, 512, 3)
Mask shape: (512, 512)
Traceback (most recent call last):
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/feature_extraction_utils.py”, line 193, in convert_to_tensors
tensor = as_tensor(value)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/feature_extraction_utils.py”, line 150, in as_tensor
return torch.from_numpy(value)
TypeError: expected np.ndarray (got numpy.ndarray)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File “/mnt/home/pycharm/huggingface/train.py”, line 88, in
trainer.train()
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/trainer.py”, line 2123, in train
return inner_training_loop(
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/trainer.py”, line 2427, in _inner_training_loop
batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/trainer.py”, line 5045, in get_batch_samples
batch_samples += [next(epoch_iterator)]
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/accelerate/data_loader.py”, line 552, in iter
current_batch = next(dataloader_iter)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/torch/utils/data/dataloader.py”, line 701, in next
data = self._next_data()
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/torch/utils/data/dataloader.py”, line 757, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py”, line 52, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py”, line 52, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File “/mnt/home/pycharm/huggingface/train.py”, line 40, in getitem
encoded_inputs = self.processor(
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/models/mask2former/image_processing_mask2former.py”, line 573, in call
return self.preprocess(images, segmentation_maps=segmentation_maps, **kwargs)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/utils/deprecation.py”, line 165, in wrapped_func
return func(*args, **kwargs)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/utils/generic.py”, line 852, in wrapper
return func(*args, **valid_kwargs)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/models/mask2former/image_processing_mask2former.py”, line 768, in preprocess
encoded_inputs = self.encode_inputs(
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/models/mask2former/image_processing_mask2former.py”, line 931, in encode_inputs
encoded_inputs = self.pad(
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/models/mask2former/image_processing_mask2former.py”, line 861, in pad
return BatchFeature(data=data, tensor_type=return_tensors)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/feature_extraction_utils.py”, line 79, in init
self.convert_to_tensors(tensor_type=tensor_type)
File “/home/ojdeboer/anaconda3/envs/hf/lib/python3.10/site-packages/transformers/feature_extraction_utils.py”, line 199, in convert_to_tensors
raise ValueError(
ValueError: Unable to create tensor, you should probably activate padding with ‘padding=True’ to have batched tensors with the same length.
0%| | 0/2500 [00:00<?, ?it/s]
Process finished with exit code 1
My transformers environment:
transformers
version: 4.46.2- Platform: Linux-6.8.0-48-generic-x86_64-with-glibc2.35
- Python version: 3.10.15
- Huggingface_hub version: 0.24.6
- Safetensors version: 0.4.5
- Accelerate version: 1.1.1
- Accelerate config: not found
- PyTorch version (GPU?): 2.5.1 (True)
- Tensorflow version (GPU?): not installed (NA)
- Flax version (CPU?/GPU?/TPU?): not installed (NA)
- Jax version: not installed
- JaxLib version: not installed
- Using distributed or parallel set-up in script?:
- Using GPU in script?:
- GPU type: NVIDIA GeForce RTX 3090
I suspect this issue might be due to the shape of the processed data or the need for padding. Should I modify the processor or handle padding differently to enable batched tensors? Thank you very much in advance for any advice!