I’m trying to prompt-tune gemma2-it using this code:
from peft import get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit
tuning_config = PromptTuningConfig(
task_type=TaskType.CAUSAL_LM,
prompt_tuning_init=PromptTuningInit.RANDOM,
num_virtual_tokens=2,
tokenizer_name_or_path=TOKENIZER_ID
)
peft_model = get_peft_model(model, tuning_config)
training_args = SFTConfig(
output_dir="./gemma_nn_1b_freeze",
num_train_epochs=1,
per_device_train_batch_size=1,
gradient_accumulation_steps=8,
max_seq_length=2503,
optim="adamw_torch_fused",
learning_rate=2e-4,
lr_scheduler_type="constant",
warmup_ratio=0.03,
logging_steps=1,
save_steps=200,
save_strategy="steps",
bf16=True,
fp16=False,
max_grad_norm=0.3,
gradient_checkpointing=True,
packing=True,
report_to="none",
disable_tqdm=False,
dataset_kwargs={
"add_special_tokens": False, # We template with special tokens
"append_concat_token": False, # No need to add additional separator token
},
)
trainer = SFTTrainer(
model=model,
processing_class=tokenizer,
train_dataset=train_sample,
args=training_args,
peft_config=tuning_config
)
trainer.train()
but after training finished when I want to use the model I get this error:
def get_outputs(model, inputs, max_new_tokens=256):
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_new_tokens=max_new_tokens,
repetition_penalty=1.5,
early_stopping=True,
eos_token_id=tokenizer.eos_token_id,
)
return outputs
trained_model = trainer.model
input_prompt = tokenizer("I want you to act as a motivational coach. ", return_tensors="pt")
loaded_model_sentences_outputs = get_outputs(trained_model, input_prompt)
print(tokenizer.batch_decode(loaded_model_sentences_outputs, skip_special_tokens=True))
Error:
1 def get_outputs(model, inputs, max_new_tokens=256):
----> 2 outputs = model.generate(
3 input_ids=inputs["input_ids"],
4 attention_mask=inputs["attention_mask"],
5 max_new_tokens=max_new_tokens,
6 repetition_penalty=1.5,
7 early_stopping=True,
8 eos_token_id=tokenizer.eos_token_id,
9 )
10 return outputs
File c:\Users\ALI\AppData\Local\Programs\Python\Python310\lib\site-packages\peft\peft_model.py:1640, in PeftModelForCausalLM.generate(self, *args, **kwargs)
1638 outputs = self.base_model.generate(*args, **kwargs)
1639 else:
-> 1640 outputs = self.base_model.generate(**kwargs)
1641 except:
...
1682 )
1684 if model_kwargs.get("position_ids", None) is not None:
1685 warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
RuntimeError: Tensors must have same number of dimensions: got 2 and 4
my tokenizer is philschmid/gemma-tokenizer-chatml but I also tried the default tokenizer of gemma2. I think the problem comes from the extra tokens that the prompt-tuned model adds but I have no idea how it’s done and how I can solve the error. Help is appreciated.