I’m trying to capture autolog parameters with MLFlow. I’m using the MLflowCallback class written in the transformers.integrations.MLflowCallback and interfacing with transformers.TrainerCallback class.
Here’s the relevant code that tells MLFlow to create an experiment, send it to a host tracking server, and tells transformers what type of logging to do (as defined by the MLflowCallback class).
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaConfig, Trainer, TrainingArguments
import mlflow
from transformers import TrainerCallback
from transformers.integrations import MLflowCallback
remote_server_uri = [PRIVATE SERVER URL]
mlflow.set_tracking_uri(remote_server_uri)
# After loading and tokenizing data, here we run the training experiment
experiment_name = "ht_vp_roberta_randomSearch"
mlflow.set_experiment(experiment_name) # server creates experiment folder at this point
with mlflow.start_run():
training_args = TrainingArguments(
output_dir=experiment_name,
evaluation_strategy='epoch',
eval_steps=500,
gradient_accumulation_steps=1000,
eval_accumulation_steps=1,
)
model = RobertaForSequenceClassification.from_pretrained("roberta-base")
trainer = Trainer(
args=training_args,
tokenizer=tokenizer,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
model=model,
compute_metrics=hf.compute_metrics,
callbacks=MLflowCallback, # This triggers error
)
trainer.train()
trainer.evaluate()
I’m getting the below error:
Traceback (most recent call last):
File "mlflow_test_simple.py", line 80, in <module>
trainer = Trainer(
File "/home/jovyan/conda/dsEnv/lib/python3.8/site-packages/transformers/trainer.py", line 385, in __init__
callbacks = default_callbacks if callbacks is None else default_callbacks + callbacks
TypeError: can only concatenate list (not "type") to list