This is my code:
class MultilabelTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.pop('labels')
outputs = model(**inputs)
criterion = nn.NLLLoss(weight=class_weights)
loss = criterion(outputs, labels)
return (loss, outputs) if return_outputs else loss
class BertArticleClassifier(nn.Module):
def __init__(self, n_classes, freeze_bert_weights=False):
super(BertArticleClassifier, self).__init__()
self.bert = AutoModel.from_pretrained('bert-base-uncased')
if freeze_bert_weights:
for param in self.bert.parameters():
param.requires_grad = False
self.dropout = nn.Dropout(0.1)
self.fc_1 = nn.Linear(768, 256)
self.leaky_relu = nn.LeakyReLU()
self.fc_out = nn.Linear(256, n_classes)
self.log_softmax = nn.LogSoftmax(dim=1)
def forward(self, input_ids, attention_mask):
output = self.bert(input_ids, attention_mask)
return self.log_softmax(self.fc_out(self.leaky_relu(self.fc_1(self.dropout(output['pooler_output'])))))
class ArticleDataset(Dataset):
def __init__(self, input_ids, attention_mask, labels):
super(ArticleDataset, self).__init__()
self.input_ids = input_ids
self.attention_mask = attention_mask
self.labels = labels
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
return {'input_ids': self.input_ids[idx],
'attention_mask': self.attention_mask[idx],
'labels': self.labels[idx]}
train_dataset = ArticleDataset(encoded_data_train['input_ids'], encoded_data_train['attention_mask'], torch.tensor(df_train['label']))
val_dataset = ArticleDataset(encoded_data_val['input_ids'], encoded_data_val['attention_mask'], torch.tensor(df_valid['label']))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BertArticleClassifier(n_classes=len(label_dict), freeze_bert_weights=False)
optimizer = AdamW(model.parameters(),
lr=1e-4,
eps=1e-6)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
criterion = nn.NLLLoss(weight=class_weights)
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps=0,
num_training_steps=100 * EPOCHS)
training_args = TrainingArguments(
output_dir='./results',
evaluation_strategy='epoch',
num_train_epochs=4,
per_device_train_batch_size=2,
per_device_eval_batch_size=4,
weight_decay=0.01,
save_total_limit=1,
)
trainer = MultilabelTrainer(model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
compute_metrics=compute_metrics,
optimizers=[optimizer, scheduler])
trainer.train()
encoded_data_train and encoded_data_val are created using BertTokenizer.
The problem is that model was learning when I was not using Trainer class, but I needed to tweak the loss function to accept class weights (my dataset is a bit unbalanced), so I decided just to overwrite the loss function in the Trainer class.
I don’t get no learning if I use my own Trainer class or default Trainer, and after every epoch my model’s loss is always ~3.3.
If there is a way to use class weights without the need of Trainer I would be happy to know that, anyways, if you have some advice or help, I’d be happy to listen.