ValueError - number of spatial dimensions

Hello everyone !

First topic, first post for me so if i miss some explaination, i will update it as soon as possible.

I I am fine tuning a maskformer model with a custom COCO dataset. I have some issue when i go for the forward_call to get the loss (function get_loss_dict)

Some of my picture do not raise any error and some of them raise this error. Don’t understand why.

Here the code of the dataset:

class CocoInstanceDataset(
    def __init__(self, ann_file, img_folder, processor=None, transform=None, increment_instance_ids:bool = True, increment_class_ids:bool = True):
      with open(ann_file, 'r') as f:
          self.coco = json.load(f)
      self.coco['images'] = sorted(self.coco['images'], key=lambda x: x['id'])
      self.img_folder = img_folder
      self.processor = processor
      self.transform = transform
      self.increment_instance_ids = increment_instance_ids
      self.increment_class_ids = increment_class_ids
      self.id2label = {cat['id']:cat['name'] for cat in self.coco['categories']}
      self.label2id = {cat['name']:cat['id'] for cat in self.coco['categories']}

    def get_mask_from_segmentation(self, segmentation, instance_id, width, height):
      mask = imantics.Polygons(segmentation).mask(width=width, height=height).array.astype(np.int64)
      if mask.sum()>0:
        return mask
    def __getitem__(self, idx):
      # get image & image infos
      img_path = os.path.join(self.img_folder, self.coco['images'][idx]['file_name'])
      image = np.array( #.convert('RGB')).transpose(2, 0, 1) #see if convert RGB is needed
      # get annotations
      annotations = [ann for ann in self.coco['annotations'] if ann['image_id']==self.coco['images'][idx]['id']]
      masks = []
      for i, ann in enumerate(annotations):
        mask = self.get_mask_from_segmentation(segmentation=ann['segmentation'], 
                                               instance_id = i+self.increment_instance_ids, 
        if mask is not None:
      instance_id_2_category_id = {i+self.increment_instance_ids:ann['category_id']+self.increment_class_ids for i,ann in enumerate(annotations)}
      # apply transformation
      if self.transform is not None:
        transformed = self.transform(image=image, masks=masks)
        image, masks = transformed['image'], transformed['masks']
      # convert to C, H, W
      # image = image.transpose(2,0,1)
      # tweak to process overlaping masks
      for i, mask in enumerate(masks):          
        encoding = self.processor(images=[image], segmentation_maps=[mask], instance_id_to_semantic_id=instance_id_2_category_id, return_tensors="pt")
        if i==0:
      inputs =  {'pixel_values':pixel_values, 
                 'mask_labels': torch.stack(mask_labels).squeeze(), 
                 'class_labels': torch.stack(class_labels).squeeze()

      inputs = {k: v.squeeze() if isinstance(v, torch.Tensor) else v[0] for k,v in inputs.items()}

      if len(class_labels) == 1:
        inputs["class_labels"] = torch.stack(class_labels).squeeze(dim=1)

      return inputs

    def __len__(self):
      return len(self.coco['images'])

below the model call to get loss:

outputs = model(

And below the error trace