Hi everyone. I’m using accelerate and I don’t know why my code terminates right away. There’s probably something wrong as I could easily train models using accelerate before. I’m not getting any errors–it’s just that the code terminates as soon as I execute the notebook launcher cell. Here’s my code:
def dtr():
from transformers import Trainer, TrainingArguments
from accelerate import Accelerator
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)
#model.to(device)
model_name = model_checkpoint.split("/")[-1]
training_args = TrainingArguments(
f"models/S/100-10%",
evaluation_strategy = "epoch",
learning_rate=2e-5,
weight_decay=0.01,
push_to_hub=False,
num_train_epochs=1,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
save_strategy="steps",
save_steps = 242,
)
accelerator = Accelerator()
trainer = accelerator.prepare(Trainer(
model=model,
args=training_args,
train_dataset=lm_datasets["train"],
eval_dataset=lm_datasets["validation"],
))
trainer.train()
from accelerate import notebook_launcher
args = ()
notebook_launcher(dtr, num_processes=4)