I was kinda exploring segformer :
error I am
ValueError: Target size (torch.Size([12, 1, 512, 512])) must be the same as input size (torch.Size([12, 1, 128, 128]))
My Model is :
class My_model(nn.Module):
def __init__(self):
super(My_model, self).__init__()
# configuration = SegformerConfig.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
# configuration.num_labels = 1 ## set output as 1
# self.model = SegformerForSemanticSegmentation(config=configuration)
self.model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512",
num_labels=1,)
def forward(self, image):
img_segs = self.model(image)
return img_segs.logits
Error :
ValueError: Target size (torch.Size([12, 1, 512, 512])) must be the same as input size (torch.Size([12, 1, 128, 128]))
my input size → 512 my batch_size —>12 Image shape —> (3,512,512)
@nielsr
what I think is model(input).logits is H/4 and W/4 but what if I wanted mask of (B,C,H,W) . . . How do I upsample from here
nielsr
July 19, 2022, 8:15am
3
Upsampling can be done using torch.nn.functional.interpolate (assuming image
is a Pillow image):
from torch import nn
# rescale logits to original image size
logits = nn.functional.interpolate(outputs.logits.detach().cpu(),
size=image.size[::-1], # (height, width)
mode='bilinear',
align_corners=False)
I’ve taken the code snippet above from my SegFormer inference notebook which you can find here .
class my_model(nn.Module):
def __init__(self):
super(my_model, self).__init__()
# configuration = SegformerConfig.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
# configuration.num_labels = 1 ## set output as 1
# self.model = SegformerForSemanticSegmentation(config=configuration)
self.model = SegformerForSemanticSegmentation.from_pretrained("../input/nvidia-segformer-b2-finetuned-ade-512-512/segformer-b2-finetuned-ade-512-512",
num_labels=1,ignore_mismatched_sizes=True)
def forward(self, image):
img_segs = self.model(image)
upsampled_logits = nn.functional.interpolate(img_segs.logits,
# size=(image.shape[0],1,image.shape[2],image.shape[3]), # (height, width)
scale_factor=4,
mode='bilinear',
align_corners=False)
# print(upsampled_logits.shape)
upsampled_logits = torch.sigmoid(upsampled_logits)
return upsampled_logits
Actually i did this… It worked for me .