@lucasresck as per your suggestion, I did the following
train_losses = []
num_mb_train = len(train_dataloader)
import torch.nn as nn
import numpy as np
total_preds=[]
for epoch in range(num_epochs):
train_loss = 0
for step, batch in enumerate(train_dataloader):
model.train()
batch = tuple(t.to(device) for t in batch)
b_input_ids, b_input_mask,b_token_type, b_labels = batch
outputs = model(b_input_ids, token_type_ids=b_token_type, attention_mask=b_input_mask, labels=b_labels)
criterion = torch.nn.CrossEntropyLoss(weight=weights,reduction='mean')
loss = criterion(outputs[1], b_labels)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
train_loss = loss.item()
optimizer.zero_grad()
train_losses.append(train_loss)
if (step) % 50 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, step+1, total_steps, loss.item()))
Am I doing the right way or not?