Hello there,
I am trying to apply instruction tuning to a llama based model. I have two problems with it:
- When I set predict_with_generate = True, the training raises an error before the start:
Traceback (most recent call last):
File "/dss/dsshome1/02/ra95kix2/seminar_fma/growth-vs-forgetting/src/utils/finetune.py", line 730, in <module>
train()
File "/dss/dsshome1/02/ra95kix2/seminar_fma/growth-vs-forgetting/src/utils/finetune.py", line 690, in train
train_result = trainer.train()
^^^^^^^^^^^^^^^
File "/dss/dsshome1/02/ra95kix2/miniconda3/envs/clearning/lib/python3.11/site-packages/transformers/trainer.py", line 1539, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/dss/dsshome1/02/ra95kix2/miniconda3/envs/clearning/lib/python3.11/site-packages/transformers/trainer.py", line 1809, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/dss/dsshome1/02/ra95kix2/miniconda3/envs/clearning/lib/python3.11/site-packages/transformers/trainer.py", line 2654, in training_step
loss = self.compute_loss(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/dss/dsshome1/02/ra95kix2/miniconda3/envs/clearning/lib/python3.11/site-packages/transformers/trainer.py", line 2692, in compute_loss
raise ValueError(
ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask,labels.
- When I just want to make a prediction (do_predict),
Traceback (most recent call last):
File "/dss/dsshome1/02/ra95kix2/seminar_fma/growth-vs-forgetting/src/utils/finetune_v2.py", line 716, in <module>
train()
File "/dss/dsshome1/02/ra95kix2/seminar_fma/growth-vs-forgetting/src/utils/finetune_v2.py", line 691, in train
prediction_output = trainer.predict(
^^^^^^^^^^^^^^^^
File "/dss/dsshome1/02/ra95kix2/miniconda3/envs/clearning/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 216, in predict
return super().predict(test_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/dss/dsshome1/02/ra95kix2/miniconda3/envs/clearning/lib/python3.11/site-packages/transformers/trainer.py", line 3010, in predict
output = eval_loop(
^^^^^^^^^^
File "/dss/dsshome1/02/ra95kix2/miniconda3/envs/clearning/lib/python3.11/site-packages/transformers/trainer.py", line 3123, in evaluation_loop
loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/dss/dsshome1/02/ra95kix2/miniconda3/envs/clearning/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 305, in prediction_step
loss = (outputs["loss"] if isinstance(outputs, dict) else outputs[0]).mean().detach()
~~~~~~~^^^^^^^^
File "/dss/dsshome1/02/ra95kix2/miniconda3/envs/clearning/lib/python3.11/site-packages/transformers/utils/generic.py", line 318, in __getitem__
return inner_dict[k]
~~~~~~~~~~^^^
KeyError: 'loss'
I don’t know why it attempts to compute loss even though I set it to predict_with_generate. In the training, this is not required since I am already tuning with a dataset but during the evaluation steps (I added 1 evaluation after each epoch) it would be nice to see BLEU or SARI scores. For the prediction (I assume I can use my test data), I just want to assess inference performance so I solely need generative prediction of the model.
Here is the related part of my script:
model, tokenizer = get_accelerate_model(args, checkpoint_dir)
model.config.use_cache = False
print("Loaded model")
set_seed(args.seed)
data_module = make_data_module(tokenizer=tokenizer, args=args)
task = args.task
trainer = Seq2SeqTrainer(
model=model,
tokenizer=tokenizer,
args=training_args,
**{k: v for k, v in data_module.items() if k != "predict_dataset"},
compute_metrics=lambda p: compute_metrics(
eval_preds=p.predictions,
eval_labels = p.label_ids,
tokenizer=tokenizer,
task=task # Dynamically fetched from the config
),
)
# Verifying the datatypes and parameter counts before training.
print_trainable_parameters(args, model)
dtypes = {}
for _, p in model.named_parameters():
dtype = p.dtype
if dtype not in dtypes: dtypes[dtype] = 0
dtypes[dtype] += p.numel()
total = 0
for k, v in dtypes.items(): total += v
for k, v in dtypes.items():
print(k, v, v / total)
all_metrics = {"run_name": args.run_name}
# Training
if args.do_train:
logger.info("*** Train ***")
print('are we in train?')
train_result = trainer.train()
metrics = train_result.metrics
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()
all_metrics.update(metrics)
# Evaluation
if args.do_eval:
logger.info("*** Evaluate ***")
print('are we in evaluate?')
metrics = trainer.evaluate(metric_key_prefix="eval")
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)
all_metrics.update(metrics)
# Prediction
if args.do_predict:
logger.info("*** Predict ***")
if 'labels' not in data_module['predict_dataset'].column_names:
logger.warning("No 'labels' column found in prediction dataset. Metrics like BLEU may not work.")
prediction_output = trainer.predict(
test_dataset=data_module['predict_dataset'],
metric_key_prefix="predict",
prediction_loss_only=False)
prediction_metrics = prediction_output.metrics
predictions = prediction_output.predictions
predictions = np.argmax(predictions, axis=-1)
predictions = np.where(predictions != -100, predictions, tokenizer.pad_token_id)
predictions = tokenizer.batch_decode(
predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True
)
with open(os.path.join(args.output_dir, 'predictions.jsonl'), 'w') as fout:
for i, example in enumerate(data_module['predict_dataset']):
example['prediction_with_input'] = predictions[i].strip()
example['prediction'] = predictions[i].replace(example['input'], '').strip()
fout.write(json.dumps(example) + '\n')
print(prediction_metrics)
trainer.log_metrics("predict", prediction_metrics)
trainer.save_metrics("predict", prediction_metrics)
all_metrics.update(prediction_metrics)
if (args.do_train or args.do_eval or args.do_predict):
with open(os.path.join(args.output_dir, "metrics.json"), "w") as fout:
fout.write(json.dumps(all_metrics))