Why my model doesn't learn anything?

Artalomejus · July 29, 2021, 7:16pm

This is my code:

class MultilabelTrainer(Trainer):

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop('labels')
        outputs = model(**inputs)
        criterion = nn.NLLLoss(weight=class_weights)
        loss = criterion(outputs, labels)
        return (loss, outputs) if return_outputs else loss

class BertArticleClassifier(nn.Module):
    def __init__(self, n_classes, freeze_bert_weights=False):
        super(BertArticleClassifier, self).__init__()

        self.bert = AutoModel.from_pretrained('bert-base-uncased')

        if freeze_bert_weights:
            for param in self.bert.parameters():
                param.requires_grad = False

        self.dropout = nn.Dropout(0.1)
        self.fc_1 = nn.Linear(768, 256)
        self.leaky_relu = nn.LeakyReLU()
        self.fc_out = nn.Linear(256, n_classes)
        self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids, attention_mask)
        return self.log_softmax(self.fc_out(self.leaky_relu(self.fc_1(self.dropout(output['pooler_output'])))))

class ArticleDataset(Dataset):

    def __init__(self, input_ids, attention_mask, labels):
        super(ArticleDataset, self).__init__()

        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {'input_ids': self.input_ids[idx],
                'attention_mask': self.attention_mask[idx],
                'labels': self.labels[idx]}


train_dataset = ArticleDataset(encoded_data_train['input_ids'], encoded_data_train['attention_mask'], torch.tensor(df_train['label']))
val_dataset = ArticleDataset(encoded_data_val['input_ids'], encoded_data_val['attention_mask'], torch.tensor(df_valid['label']))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = BertArticleClassifier(n_classes=len(label_dict), freeze_bert_weights=False)
optimizer = AdamW(model.parameters(),
                  lr=1e-4,
                  eps=1e-6)

class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)
criterion = nn.NLLLoss(weight=class_weights)

scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps=0,
                                            num_training_steps=100 * EPOCHS)

training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',
    num_train_epochs=4,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=4,
    weight_decay=0.01,
    save_total_limit=1,
)

trainer = MultilabelTrainer(model=model, 
                            args=training_args,  
                            train_dataset=train_dataset,  
                            eval_dataset=val_dataset,
                            compute_metrics=compute_metrics,
                            optimizers=[optimizer, scheduler])

trainer.train()

encoded_data_train and encoded_data_val are created using BertTokenizer.
The problem is that model was learning when I was not using Trainer class, but I needed to tweak the loss function to accept class weights (my dataset is a bit unbalanced), so I decided just to overwrite the loss function in the Trainer class.
I don’t get no learning if I use my own Trainer class or default Trainer, and after every epoch my model’s loss is always ~3.3.
If there is a way to use class weights without the need of Trainer I would be happy to know that, anyways, if you have some advice or help, I’d be happy to listen.

Topic		Replies	Views
Unable to train the model with weighted cross entropy Beginners	0	556	March 1, 2024
Training with class weights 🤗Transformers	5	2938	November 18, 2023
Class weights in Trainer() instance Intermediate	1	709	September 10, 2021
Mullti Label Text Classification 🤗Transformers	2	1595	June 26, 2023
How can I use class_weights when training? 🤗Transformers	19	30589	December 29, 2022

Why my model doesn't learn anything?

Related topics