I would like to know how I can save checkpoints when training a text classification model so that I can continue training from where it left off.
I’m having trouble and don’t know how to configure my code to save the checkpoints with the appropriate files so that I can continue training at the point where it ended previously, such as “trainer_state.json”.
Here is my training code:
def executar_treinamento(self, base_treinada):
training_args = TrainingArguments(
output_dir=self.output_dir,
activation_checkpointing=True,
learning_rate=2e-5,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
num_train_epochs=8,
weight_decay=0.01,
evaluation_strategy="epoch",
save_strategy="steps",
save_only_model=False,
load_best_model_at_end=False
)
self.trainer = Trainer(
model=self.model,
args=training_args,
compute_metrics=self.calcular_metricas,
train_dataset=base_treinada['train'],
eval_dataset=base_treinada['validation'],
tokenizer=self.tokenizar_textos
)
self.trainer.train()
def avaliar_modelo(self, base_treinada):
self.trainer.evaluate(base_treinada['test'])
def salvar_modelo(self, caminho):
self.model.save_pretrained(caminho)
self.tokenizer.save_pretrained(caminho)