Keyerror: 'loss' when change the backbone in opendelta

CaffreyR · July 9, 2022, 2:38pm

I was studying the demo of Opendelta, and they use T5 to train on ‘trivia_qa’ , and I change the T5 to RoBerta as a backbone, but it say KeyError ‘loss’, when I simply change the model.

My model code is as followed

from dataclasses import dataclass, field
from typing import Optional, List
from transformers import Seq2SeqTrainingArguments, TrainerCallback, RobertaTokenizer, RobertaModel
from datasets import load_dataset, load_metric, concatenate_datasets
import transformers
from transformers import (
    AutoConfig,
    RobertaConfig,
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    HfArgumentParser,
    MBartTokenizer,
    default_data_collator,
    set_seed,
)
from datasets import load_dataset
import torch
import numpy as np
import random

@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
    """
    model_name_or_path: str = field(
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
    )
    config_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
    )
    tokenizer_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
        default=None,
        metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
    )
    use_fast_tokenizer: bool = field(
        default=True,
        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
    )
    model_revision: str = field(
        default="main",
        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
    )
    use_auth_token: bool = field(
        default=False,
        metadata={
            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
            "with private models)."
        },
    )

model_args = ModelArguments(model_name_or_path="roberta-base", )

config = RobertaConfig.from_pretrained(
    model_args.config_name if model_args.config_name else model_args.model_name_or_path,
    cache_dir=model_args.cache_dir,
    revision=model_args.model_revision,
    use_auth_token=True if model_args.use_auth_token else None,
)
config.dropout_rate = 0.0
tokenizer = RobertaTokenizer.from_pretrained(
    model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
    cache_dir=model_args.cache_dir,
    use_fast=model_args.use_fast_tokenizer,
    revision=model_args.model_revision,
    use_auth_token=True if model_args.use_auth_token else None,
)
model = RobertaModel.from_pretrained(
    model_args.model_name_or_path,
    from_tf=bool(".ckpt" in model_args.model_name_or_path),
    config=config,
    cache_dir=model_args.cache_dir,
    revision=model_args.model_revision,
    use_auth_token=True if model_args.use_auth_token else None,
)
model.resize_token_embeddings(len(tokenizer))

My dataset code is as followed.

@dataclass
class TrainingArguments(Seq2SeqTrainingArguments):
    print_num_parameters: Optional[bool] = field(default=False, metadata={"help": "If set, print the parameters of "
                                                                                 "the model."})
    do_test: Optional[bool] = field(default=False, metadata={"help": "If set, evaluates the test performance."})
    split_validation_test: Optional[bool] = field(default=False,
                                                  metadata={"help": "If set, for the datasets which do not"
                                                                    "have the test set, we use validation set as their"
                                                                    "test set and make a validation set from either"
                                                                    "splitting the validation set into half (for smaller"
                                                                    "than 10K samples datasets), or by using 1K examples"
                                                                    "from training set as validation set (for larger"
                                                                    " datasets)."})
    compute_time: Optional[bool] = field(default=False, metadata={"help": "If set measures the time."})
    compute_memory: Optional[bool] = field(default=False, metadata={"help": "if set, measures the memory"})


training_args = TrainingArguments(output_dir="./", 
                                  do_train=True,
                                  do_eval=True,
                                  do_predict=False,
                                  evaluation_strategy="steps",
                                  eval_steps=200,
                                  save_strategy="steps",
                                  save_steps=200,
                                  greater_is_better=True,
                                  load_best_model_at_end=True,
                                  compute_memory=True,
                                  predict_with_generate=True,
                                  push_to_hub=False,
                                  learning_rate=1e-3,
                                  seed=42,
                                  per_device_eval_batch_size=32,
                                  per_device_train_batch_size=32,
                                  num_train_epochs=1,
                                  metric_for_best_model="em",
                                  warmup_steps=0,
                                  save_total_limit=1,
                                  gradient_accumulation_steps=1
                                  )

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    decoded_preds = [tokenizer.decode(i, skip_special_tokens=True, clean_up_tokenization_spaces=True) for i in preds]
    decoded_labels = [tokenizer.decode(i, skip_special_tokens=True, clean_up_tokenization_spaces=True) for i in labels]
    result = {}
    result_list = [int(i==j) for i, j in zip(decoded_labels, decoded_preds)]
    result.update({"em":sum(result_list)/len(result_list)})
    print(result)
    return result

mydataset = load_dataset("trivia_qa","unfiltered.nocontext")
mydataset['train'] = mydataset['train']
validation_index = np.arange(len(mydataset['validation']))
np.random.shuffle(validation_index)
mydataset['validation'] = mydataset['validation'].select(validation_index[:500])

def misspelling(x):
    length = len(x)
    replace_time = np.random.randint(3)
    count = 0

    while (count<replace_time):
        randfloat = np.random.rand()
        if randfloat < 0.15:
            x = x.split()
            switch_index = [np.random.randint(low=0, high=len(x)) for i in range(2)]
            tmp = x[switch_index[0]]
            x[switch_index[0]] = x[switch_index[1]]
            x[switch_index[1]] = tmp
            x = " ".join(x)
        elif randfloat < 0.3:
            x = x.split()
            drop_index = np.random.randint(low=0, high=len(x))
            x = x[:drop_index] + x[drop_index+1:]
            x = " ".join(x)
        elif randfloat < 0.8:
            replace_str = "".join([random.choice('abcdefghijklmnopqrstuvwxyz!@#$%^&*()') for i in range(np.random.randint(1,3))])
            rindx = np.random.randint(low=0, high=length)
            x = x[:rindx]+replace_str+x[rindx+1:]
        else:
            x=list(x)
            switch_index = [np.random.randint(low=0, high=len(x)) for i in range(2)]
            tmp = x[switch_index[0]]
            x[switch_index[0]] = x[switch_index[1]]
            x[switch_index[1]] = tmp
            x = "".join(x)
        count+=1
    return x

def tokenize_function(examples):
    input_sentences = [" ".join((i.strip("\n").strip().strip("?")+"?").split()[:20]) for i in examples["question"]]
    mis_spellings = [misspelling(x) for x in input_sentences]
    input_ids = [tokenizer.encode(i, padding="max_length", truncation=True, max_length=64) for i in mis_spellings]
    label = [tokenizer.encode(i, padding="max_length", truncation=True, max_length=64) for i in input_sentences]
    return {"input_ids": input_ids, "labels": label}

tokenized_datasets = mydataset.map(tokenize_function, remove_columns=['answer', 'question_source',"entity_pages",'search_results'], batched=True)

class MyCallback(TrainerCallback):
    def on_evaluate(self, args, state, control, **kwargs):
        """
        Event called after an evaluation phase.
        """
        sents = ["was Wher Newton bon?", 
                 "In year which Beiiing Olmpic ld?"
                ]
        for sent in sents:
            input_ids = tokenizer(sent, return_tensors="pt").input_ids.cuda()
            answers_ids =model.generate(input_ids=input_ids, 
                                    max_length=20, 
                                    num_beams=4, 
                                    )
            print("{} {}".format(sent, tokenizer.decode(answers_ids[0], skip_special_tokens=True)))
        print("max allocated memory {} GB".format(torch.cuda.max_memory_allocated(f"cuda:0")/1024**3))
        
from transformers import Seq2SeqTrainer
training_args.output_dir = "./SpellingCorrection" # to avoid conflict
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    callbacks=[MyCallback],
    compute_metrics=compute_metrics,
)

trainer.train()

And it says Keyerror ‘loss’. Thanks very much!

Topic		Replies	Views
KeyError: 'loss' when fine-tuning a Transformer model Beginners	7	2446	July 12, 2022
Why am I getting KeyError: 'loss'? Beginners	9	16442	March 17, 2023
Problem in loading an old sentence classification roberta model generated using transformer version 3.0.2 with new library 🤗Transformers	0	633	September 30, 2022
Troubleshoot KeyError: loss Beginners	3	319	January 12, 2023
KeyError: 'loss' while training QnA Beginners	2	2553	March 17, 2022

Keyerror: 'loss' when change the backbone in opendelta

Related topics