Hi. I am fine-tunning marianmt for a translation task. and as soon as i added early stopping. it crashes without even giving an error message:
from transformers import DataCollatorForSeq2Seq, MarianMTModel, MarianTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback, Seq2SeqTrainingArguments, Seq2SeqTrainer
from contracts_translation.src.finetunning.src.logger import CSVLoggerCallback
import evaluate
import warnings
warnings.filterwarnings(“ignore”)
class MarianFineTuner:
def init(self, model_name: str, device: str, config: dict):
self.model_name = model_name
self.device = device
self.config = config
self.tokenizer = MarianTokenizer.from_pretrained(model_name)
self.model = MarianMTModel.from_pretrained(model_name).to(device)
self.data_collator = DataCollatorForSeq2Seq(tokenizer=self.tokenizer, model=self.model)
def tokenize_dataset(self, dataset, source_col: str, target_col: str):
def tokenize_function(examples):
model_inputs = self.tokenizer(examples[source_col], truncation=True)
with self.tokenizer.as_target_tokenizer():
labels = self.tokenizer(examples[target_col], truncation=True)
model_inputs["labels"] = labels["input_ids"]
return model_inputs
return dataset.map(tokenize_function, batched=True, remove_columns=[source_col, target_col])
def compute_metrics(self, eval_preds):
preds, labels = eval_preds
decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)
decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)
# Strip
decoded_preds = [pred.strip() for pred in decoded_preds]
decoded_labels = [label.strip() for label in decoded_labels]
bleu = evaluate.load("bleu")
meteor = evaluate.load("meteor")
bleu_result = bleu.compute(predictions=decoded_preds, references=[[l] for l in decoded_labels])
meteor_result = meteor.compute(predictions=decoded_preds, references=decoded_labels)
return {
"bleu": round(bleu_result["bleu"], 4),
"meteor": round(meteor_result["meteor"], 4)
}
def train(self, train_dataset, val_dataset, experiment_name):
training_args = Seq2SeqTrainingArguments(
output_dir=self.config["temp_output_dir"],
per_device_train_batch_size=self.config["batch_size"],
per_device_eval_batch_size=self.config["batch_size"],
num_train_epochs=self.config["num_train_epochs"],
eval_strategy="epoch",
logging_strategy="epoch",
save_strategy="epoch",
save_total_limit=1,
do_train=True,
do_eval=True,
report_to=[],
load_best_model_at_end=True,
metric_for_best_model="meteor",
greater_is_better=True,
predict_with_generate=True,
torch_empty_cache_steps=2,
eval_accumulation_steps=10,
)
log_path = f"results/epoch_logs/epoch_log_{experiment_name}.csv"
trainer = Seq2SeqTrainer(
model=self.model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
tokenizer=self.tokenizer,
data_collator=self.data_collator,
callbacks=[
CSVLoggerCallback(log_path, experiment_name),
EarlyStoppingCallback(early_stopping_patience=2)
],
compute_metrics=self.compute_metrics
)
trainer.train()
trainer.save_model(f"models/{experiment_name}")
this is how i am instansiating my model:
def main():
with open(f"config/{args.experiment_name}.yaml") as f:
config = yaml.safe_load(f)
cols = ["MARKETING_REFERENCE_fr", "MARKETING_REFERENCE_root", config["source_column"], config["target_column"]]
train_ds, val_ds = load_datasets(
config["data"]["train_path"], config["data"]["val_path"], cols
)
torch.cuda.empty_cache()
output_dir = Path("models/temp_output")
output_dir.mkdir(parents=True, exist_ok=True)
config["temp_output_dir"] = str(output_dir)
trainer = MarianFineTuner(model_name=config["model_name"], device=config["device"], config=config)
train_ds = trainer.tokenize_dataset(train_ds, config["source_column"], config["target_column"])
val_ds = trainer.tokenize_dataset(val_ds, config["source_column"], config["target_column"])
Path("results").mkdir(parents=True, exist_ok=True)
trainer.train(train_ds, val_ds, experiment_name=args.experiment_name)
shutil.rmtree(output_dir)
do you know why this would happend?