How can i use torch.optim.lr_scheduler.MultiStepLR with Trainer?

Is there any way to change learning rate scheduler by using Pytorch’s MultiStepLR with Trainer?

1 Like

You can pass your own optimizer and scheduler to the Trainer. See the documentation for more information.

here is how i create it:

from torch import nn
from transformers import Trainer

class custom_optimizer(Trainer):
  def create_optimizer(self):
    optimizer = AdamW(model.parameters(),
                  lr = 2e-7
                )
    return self.optimizer

from torch.optim.lr_scheduler import MultiStepLR
class custom_scheduler(Trainer):
  def create_scheduler(self, num_training_steps):
    scheduler = MultiStepLR(optimizer, milestones=[1,5], gamma=0.5)
    return self.scheduler

and my trainer looks like this:

from transformers import Trainer, AdamW

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
    optimizers = (custom_optimizer(model).create_optimizer(), custom_scheduler(model).create_scheduler(total_steps))
)

trainer.train()

But this is giving me an error:

NameError: name 'optimizer' is not defined

Can you please help me to resolve this issue?

Just remove all those methods and subclasses and create your obejcts optimizer and scheduler in the right order.

Okay so i tried this so far:

from transformers import TrainingArguments

training_args = TrainingArguments(
    "rugpt3-headers",
    num_train_epochs=3,
    per_device_train_batch_size=3,
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

from transformers import Trainer, AdamW
from torch.optim.lr_scheduler import MultiStepLR

class MyTrainer(Trainer):
    def __init__(self, model = None, args = None, train_dataset = None, eval_dataset = None, compute_metrics = None, callbacks = None):
      super().__init__(model, args, train_dataset, eval_dataset,compute_metrics, callbacks)
    
    def create_optimizer(self):
      self.optimizer = AdamW(model.parameters(),lr = 2e-7)
    
    def create_scheduler(self, num_training_steps):
      self.lr_scheduler = MultiStepLR(self.optimizer, milestones=[1,3], gamma=0.5)

trainer = MyTrainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

trainer.train()

And the error i am getting is:

TypeError: ‘Dataset’ object is not callable

is this final solution?