Hello!
I have pushed my fine-tuned conversation model onto the hub and the hosted inference API is returning this error:
It's impossible to use `encoder_no_repeat_ngram_size` with decoder-only architecture
Here are the codes:
raw_datasets = load_dataset("IlyaGusev/gpt_roleplay_realm", revision="main", cache_dir="E:\HuggingFace")
dia_sets = raw_datasets["en"]
checkpoint = "facebook/blenderbot-400M-distill"
tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model = AutoModelForCausalLM.from_pretrained(checkpoint)
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))
def tokenize_function(dia_sets):
# raw_datasets = [utterance for turns in raw_datasets["turns"] for turn in turns for utterance in turn["utterances"]]
dia_sets = [actuals['content'] for turns in dia_sets["dialogues"] for ac in turns for actuals in ac["chat"]]
return tokenizer(dia_sets, padding=True, truncation=True, return_overflowing_tokens=True)
tokenized_datasets = dia_sets.map(tokenize_function, remove_columns=raw_datasets['en'].column_names, batched=True) #remove_columns=raw_datasets["train"].column_names
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
training_args = TrainingArguments("test_trainer",
save_strategy="epoch",
per_device_train_batch_size=4,
gradient_accumulation_steps=8,
optim="adafactor",
eval_accumulation_steps=1,
num_train_epochs=3,
)
def compute_metrics(eval_preds):
metric = evaluate.load("IlyaGusev/gpt_roleplay_realm", cache_dir="E:\HuggingFace")
logits, labels = eval_preds
predictions = np.argmax(logits, axis=-1)
return metric.compute(predictions=predictions, references=labels)
trainer = Trainer(
model,
training_args,
train_dataset=tokenized_datasets,
# eval_dataset=tokenized_datasets["validation"],
data_collator=data_collator,
tokenizer=tokenizer,
compute_metrics=compute_metrics,
)
trainer.train()
path = r".env\test_bot"
trainer.save_model(path)
model = AutoModelForCausalLM.from_pretrained(path)
tokenizer = AutoTokenizer.from_pretrained(path)
model.push_to_hub("test_bot")
tokenizer.push_to_hub("test_bot")
Is there anything I need to tweak so that it does not try to use an encoder inside the model when having it to generate a dialogue? Also, the Inference API made it text generation instead of making it conversational in the model card.
Edit: I have tried deleting the "encoder_no_repeat_ngram_size": 3
before pushing it to the hub but the error still occurred