hello everyone, i’m try to fine-tuning DistilBertForSequenceClassification on my custom dataset in order to perform binary classification using colab , first time i fine-tuned the model it give me a good accuracy on my test set however, the second time i tried to fine-tuned the model ones more in order to save the model this time , the accuracy on the same test set is decreased so much , i tried over and over again and nothing changes. can any one help me to understand what’s going on.
here is the code:
train_texts , test_texts , train_labels , test_labels = train_test_split(texts, labels, test_size=0.2 , random_state=40)
checkpoint = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizer.from_pretrained(checkpoint)
model = DistilBertForSequenceClassification.from_pretrained(checkpoint, num_labels=2, problem_type= 'single_label_classification' , id2label=id2label , label2id=label2id ) #found
class CustomDataset(Dataset):
def __init__(self , texts, labels, tokenizer , max_len = 160):
self.texts = texts
self.labels = labels
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.texts)
def __getitem__(self , idx):
text = str(self.texts[idx])
label = torch.tensor(self.labels[idx])
encoding = self.tokenizer(text, truncation=True , padding = 'max_length', max_length = self.max_len, return_tensors='pt')
return {
'input_ids': encoding['input_ids'].flatten(),
'attention_mask':encoding['attention_mask'].flatten(),
'labels': label
}
def compute_metricn (pred):
labels = pred.label_ids
preds = pred.predictions.argmax(1)
precision, recall, f1n, _ = precision_recall_fscore_support(labels, preds, average='macro', zero_division=1)
acc = accuracy_score(labels, preds)
metrics = {
'ACC': acc,
'f1':f1n,
'Precision': precision,
'Recall': recall
}
return metrics```
#training
from transformers import TrainingArguments , Trainer #found
args = TrainingArguments(
per_device_train_batch_size = 8,
per_device_eval_batch_size = 8,
output_dir = ‘./results’,
num_train_epochs=2 ,
save_steps= 200,
logging_steps=172,
evaluation_strategy="epoch",
save_total_limit = 1, # Only the best 1 a models are saved. Older ones are deleted
)
trainer2 = Trainer(model=model , args = args , train_dataset = train_dataset, eval_dataset= test_dataset ,compute_metrics=compute_metricn )
from transformers.trainer_callback import DefaultFlowCallback, TrainerCallback, TrainerControl, TrainerState
from copy import deepcopy
class CustomCallback(TrainerCallback):
def __init__(self, trainer) -> None:
super().__init__()
self._trainer = trainer
def on_epoch_end(self, args, state, control, **kwargs):
if control.should_evaluate:
control_copy = deepcopy(control)
self._trainer.evaluate(eval_dataset=self._trainer.train_dataset, metric_key_prefix="train")
return control_copy
trainer = Trainer(
model=model, # the instantiated Transformers model to be trained
args= args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=test_dataset, # evaluation dataset
compute_metrics=compute_metricn, # the callback that computes metrics of interest
tokenizer=tokenizer
)
trainer.add_callback(CustomCallback(trainer))
train = trainer.train()