I am trying to train Mask2Former with CUDA enabled, but I am encountering the following error: “RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same.” However, it works fine when CUDA is disabled. Can someone please help me fix this issue? Thank you in advance!
config = Mask2FormerConfig(feature_size=CFG.img_size[0], mask_feature_size=CFG.img_size[0])
image_processor = Mask2FormerImageProcessor(config)
model = Mask2FormerForUniversalSegmentation(config).to(device)
criterion = nn.CrossEntropyLoss()
torch.cuda.empty_cache()
epochs = 1000
scaler = GradScaler()
valid_fold = 3
train_df = df[df['fold'] != valid_fold]
valid_df = df[df['fold'] == valid_fold]
train_dataset = HubmapDataset(train_df, config)
valid_dataset = HubmapDataset(valid_df, config)
train_dataloader = DataLoader(train_dataset, batch_size=CFG.batch_size, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=2 * CFG.batch_size, shuffle=False)
optimizer, scheduler = get_optimizer_and_scheduler(model, train_dataloader, "adamw")
for epoch in range(epochs):
model.train()
total_train_loss = 0
total_test_loss = 0
pbar = tqdm(train_dataloader, desc=f"Train: Epoch {epoch + 1}", total=len(train_dataloader), mininterval=5)
for inputs in pbar:
optimizer.zero_grad()
imgs = inputs[0]
imgs = image_processor(list(imgs), return_tensors="pt", size=(512,512))
#imgs = imgs.to(device)
ings = image_processor.encode_inputs(pixel_values_list=imgs['pixel_values'],
task_inputs=['instance'],
segmentation_maps=inputs[1],
ignore_index=0,
return_tensors='pt')
ings = ings.convert_to_tensors()
for k in ings.keys():
try:
ings[k] = ings[k].to(device)
except AttributeError:
ings[k] = torch.stack(ings[k]).to(device)
outputs = model(**ings)
loss = outputs.loss
scaler.scale(loss).backward()
scaler.step(optimizer)
scale = scaler.get_scale()
scaler.update()
skip_lr_scheduler = scale > scaler.get_scale()
if scheduler is not None and not skip_lr_scheduler:
scheduler.step()
lr = scheduler.get_last_lr()[0] if scheduler else CFG.one_cycle_max_lr
loss = loss.item()
pbar.set_postfix({"loss": loss, "lr": lr})
total_train_loss += loss
total_train_loss /= len(train_dataloader)
model.eval()
pbar = tqdm(valid_dataloader, desc=f"Validation: Epoch {epoch + 1}", total=len(valid_dataloader), mininterval=5)
for inputs in pbar:
imgs = inputs[0]
imgs = image_processor(list(imgs), return_tensors="pt", size=(512,512))
#imgs = imgs.to(device)
ings = image_processor.encode_inputs(pixel_values_list=imgs['pixel_values'],
task_inputs=['instance'],
segmentation_maps=inputs[1],
ignore_index=0,
return_tensors='pt')
ings = ings.convert_to_tensors()
outputs = model(inputs)
loss = outputs.item()
pbar.set_postfix({"loss": loss})
total_test_loss += loss
total_test_loss /= len(valid_dataloader)
print(f'TOTAL TRAIN LOSS: {total_train_loss} | TOTAL VALID LOSS: {total_test_loss}')