I am trying to subclass Huggingface’s Trainer and overwrite it with custom optimizer
and lr_scheduler
from transformers import TrainingArguments
training_args = TrainingArguments(
"rugpt3-headers",
num_train_epochs=3,
per_device_train_batch_size=3,
evaluation_strategy="epoch",
logging_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
)
from transformers import Trainer, AdamW
from torch.optim.lr_scheduler import MultiStepLR
class MyTrainer(Trainer):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def create_optimizer(self):
self.optimizer = AdamW(model.parameters(),lr = 1e-7)
def create_scheduler(self, num_training_steps):
self.lr_scheduler = MultiStepLR(self.optimizer, milestones=[1,3], gamma=0.5)
trainer = MyTrainer(
model=model,
args=training_args,
train_dataset=small_train_dataset,
eval_dataset=small_eval_dataset,
compute_metrics=compute_metrics,
callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)
trainer.train()
Even though I did not specify learning_rate
in TrainingArguments
, it has a default value of 5e-7
. My attempt to overwrite the optimizer
and scheduler
is not successful because of that. After my training was completed, I used tensorboard
to check which learning rate was used and it is still 5e-07
even though I thought I overwrote it.
How to overcome this issue? I want it to set up my learning rate based on
def create_optimizer(self):
self.optimizer = AdamW(model.parameters(),lr = 1e-7)
def create_scheduler(self, num_training_steps):
self.lr_scheduler = MultiStepLR(self.optimizer, milestones=[1,3], gamma=0.5)
I wrote above and ignore the default learning rate
of TrainingArguments