Hi, i have seen this issue posted multiple times. And the answer under every post was to use
eval_accumulation_steps
. I have set eval_accumulation_steps to 1. and still I got OOM error during validation.
Here is the code
from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
args = Seq2SeqTrainingArguments(
output_dir="./mymodel",
group_by_length=True,
per_device_train_batch_size=32,
per_device_eval_batch_size=32,
gradient_accumulation_steps=4,
evaluation_strategy="steps",
num_train_epochs=5,
fp16=True,
save_steps=10000,
eval_steps=5000,
logging_steps=5000,
learning_rate=3e-4,
save_total_limit=1,
predict_with_generate=True,
eval_accumulation_steps=1,
)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
metric = datasets.load_metric("sacrebleu")
def postprocess_text(preds, labels):
preds = [pred.strip() for pred in preds]
labels = [[label.strip()] for label in labels]#ValueError: Got a string but expected a list instead: 'Norveç'in rakfisk'i: Dünyanın en kokulu bal bu mu?'
return preds, labels
def compute_metrics(eval_preds):
preds, labels = eval_preds
if isinstance(preds, tuple):
preds = preds[0]
decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
# Replace -100 in the labels as we can't decode them.
labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
# Some simple post-processing
decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)
result = metric.compute(predictions=decoded_preds, references=decoded_labels)
result = {"bleu": result["score"]}
return result
trainer = Seq2SeqTrainer(
model,
args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["test"],
data_collator=data_collator,
tokenizer=tokenizer,
compute_metrics=compute_metrics,
)
What should I do?