How to use label smoothing for single label classification in hugging face models

I am training a binary class classification model using Roberta-xlm large model.
I am using training data with hard labels as either 1 or 0.

Is it advisable to perform label smoothing on this training procedure for hard labels?
If so then what would be right way to do.

Here is my code:

tokenizer = tr.XLMRobertaTokenizer.from_pretrained("/home/scp/AIML/tokenizer_xlm2")
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=512, return_tensors="pt")

class SEDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_data = SEDataset(train_encodings, train_labels)
val_data = SEDataset(val_encodings, val_labels)
tes_data= SEDataset(test_encodings, test_labels)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
   

    predictions = np.argmax(logits, axis=-1)
    
    acc = np.sum(predictions == labels) / predictions.shape[0]
    return {"accuracy": acc,
            'precision': metrics.precision_score(labels, predictions),
            'recall': metrics.recall_score(labels, predictions),
            'f1': metrics.f1_score(labels, predictions)}

training_args = tr.TrainingArguments(
    #report_to = 'wandb',
    output_dir='/home/scp/AIML/results_vocab_ext_exp1',          # output directory
    overwrite_output_dir = True,
    num_train_epochs=10,              # total number of training epochs
    per_device_train_batch_size=10,  # batch size per device during training
    per_device_eval_batch_size=10,   # batch size for evaluation
    learning_rate=2e-5,
    warmup_steps=200,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs_exp1',            # directory for storing logs
    logging_steps=6000,
    evaluation_strategy="epoch"
    ,save_strategy="epoch"
    ,load_best_model_at_end=True
    ,fp16=True
    ,run_name="Exp1_XLM_ROBERAT_Multilingual WITH EXTENDED VOCAB"
    ,gradient_accumulation_steps=20
    
)


model = tr.XLMRobertaForSequenceClassification.from_pretrained("/home/scp/model_mlm_vocab_exp1_20epocs",problem_type="single_label_classification", num_labels=2,
                                                               ignore_mismatched_sizes=True, id2label={0: 'negative', 1: 'positive'})


trainer = tr.Trainer(
    
model=model,                         # the instantiated Transformers model to be trained
args=training_args,                  # training arguments, defined above
train_dataset=train_data,         # training dataset
eval_dataset=val_data,          # evaluation dataset
compute_metrics=compute_metrics    # the callback that computes metrics of interest

)
trainer.add_callback(CustomCallback(trainer))
trainer.train()

preds = trainer.predict(test_data)
score=softmax(preds[0], axis=1)[:,1]

Do I need to change the loss function also for label smoothing?

Hi pchhapolika,
You can easily use label_smoothing_factor in your TrainingArguments. It will do the job.

1 Like