I write the code like this
import logging
from datasets import load_from_disk
from transformers import RobertaTokenizer, EncoderDecoderModel, Trainer, TrainingArguments
from rouge import Rouge
logging.basicConfig(level=logging.INFO)
model = EncoderDecoderModel.from_encoder_decoder_pretrained("roberta-base",'roberta-base',tie_encoder_decoder=True)
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
print("begin")
# load train and validation data
train_dataset = load_from_disk("train_dataset")
val_dataset = load_from_disk('val_dataset')
print("done")
print('begin')
# load rouge for validation
rouge = Rouge()
print('done')
# set decoding params
model.config.decoder_start_token_id = tokenizer.bos_token_id
model.config.eos_token_id = tokenizer.eos_token_id
model.config.max_length = 142
model.config.min_length = 56
model.config.no_repeat_ngram_size = 3
model.early_stopping = True
model.length_penalty = 2.0
model.num_beams = 4
encoder_length = 512
decoder_length = 128
batch_size = 16
def compute_metrics(pred):
labels_ids = pred.label_ids
pred_ids = pred.predictions
# all unnecessary tokens are removed
pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
labels_ids[labels_ids == -100] = tokenizer.eos_token_id
label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)
rouge_output = rouge.compute(predictions=pred_str, references=label_str, rouge_types=["rouge2"])["rouge2"].mid
return {
"rouge2_precision": round(rouge_output.precision, 4),
"rouge2_recall": round(rouge_output.recall, 4),
"rouge2_fmeasure": round(rouge_output.fmeasure, 4),
}
# set training arguments - these params are not really tuned, feel free to change
training_args = TrainingArguments(
output_dir="robertashare",
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
evaluate_during_training=True,
do_train=True,
do_eval=True,
logging_steps=1000,
save_steps=1000,
eval_steps=1000,
overwrite_output_dir=True,
warmup_steps=2000,
save_total_limit=1,
fp16=True,
num_train_epochs=30,
eval_accumulation_steps=1,
gradient_accumulation_steps=8
)
# instantiate trainer
trainer = Trainer(
model=model,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=train_dataset,
eval_dataset=val_dataset,
)
print('begin to train')
# start training
trainer.train()
after training 1000 steps ,I begin to eval it , the following error happened
File “train_roberta.py”, line 85, in
trainer.train()
File “/home/LAB/maoqr/miniconda3/envs/py36/lib/python3.6/site-packages/transformers/trainer.py”, line 786, in train
self._maybe_log_save_evalute(tr_loss, model, trial, epoch)
File “/home/LAB/maoqr/miniconda3/envs/py36/lib/python3.6/site-packages/transformers/trainer.py”, line 843, in _maybe_log_save_evalute
metrics = self.evaluate()
File “/home/LAB/maoqr/miniconda3/envs/py36/lib/python3.6/site-packages/transformers/trainer.py”, line 1251, in evaluate
output = self.prediction_loop(eval_dataloader, description=“Evaluation”)
File “/home/LAB/maoqr/miniconda3/envs/py36/lib/python3.6/site-packages/transformers/trainer.py”, line 1381, in prediction_loop
metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
File “train_roberta.py”, line 41, in compute_metrics
pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
File “/home/LAB/maoqr/miniconda3/envs/py36/lib/python3.6/site-packages/transformers/tokenization_utils_base.py”, line 2886, in batch_decode
for seq in sequences
File “/home/LAB/maoqr/miniconda3/envs/py36/lib/python3.6/site-packages/transformers/tokenization_utils_base.py”, line 2886, in
for seq in sequences
File “/home/LAB/maoqr/miniconda3/envs/py36/lib/python3.6/site-packages/transformers/tokenization_utils.py”, line 777, in decode
filtered_tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens=skip_special_tokens)
File “/home/LAB/maoqr/miniconda3/envs/py36/lib/python3.6/site-packages/transformers/tokenization_utils.py”, line 723, in convert_ids_to_tokens
index = int(index)
TypeError: only size-1 arrays can be converted to Python scalars
How can I fix this ?