Dimension Error After Prompt-tuning the Gemma2 model

I’m trying to prompt-tune gemma2-it using this code:

from peft import  get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit

tuning_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.RANDOM,  
    num_virtual_tokens=2, 
    tokenizer_name_or_path=TOKENIZER_ID
)

peft_model = get_peft_model(model, tuning_config)

training_args = SFTConfig(
        output_dir="./gemma_nn_1b_freeze",
        num_train_epochs=1,                     
        per_device_train_batch_size=1,          
        gradient_accumulation_steps=8,          
        max_seq_length=2503,
        optim="adamw_torch_fused",              
        learning_rate=2e-4,                     
        lr_scheduler_type="constant",   
        warmup_ratio=0.03,                  
        logging_steps=1, 
        save_steps=200,                      
        save_strategy="steps",                  
        bf16=True,                              
        fp16=False,                             
        max_grad_norm=0.3,                      
        gradient_checkpointing=True,            
        packing=True,        
        report_to="none",
        disable_tqdm=False, 
        dataset_kwargs={
        "add_special_tokens": False, # We template with special tokens
        "append_concat_token": False, # No need to add additional separator token
    },
    )

trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    train_dataset=train_sample,    
    args=training_args,  
    
    peft_config=tuning_config
)

trainer.train()

but after training finished when I want to use the model I get this error:

def get_outputs(model, inputs, max_new_tokens=256):
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=max_new_tokens,
        repetition_penalty=1.5,  
        early_stopping=True,  
        eos_token_id=tokenizer.eos_token_id,
    )
    return outputs

trained_model = trainer.model

input_prompt = tokenizer("I want you to act as a motivational coach. ", return_tensors="pt")

loaded_model_sentences_outputs = get_outputs(trained_model, input_prompt)
print(tokenizer.batch_decode(loaded_model_sentences_outputs, skip_special_tokens=True))

Error:

      1 def get_outputs(model, inputs, max_new_tokens=256):
----> 2     outputs = model.generate(
      3         input_ids=inputs["input_ids"],
      4         attention_mask=inputs["attention_mask"],
      5         max_new_tokens=max_new_tokens,
      6         repetition_penalty=1.5,  
      7         early_stopping=True,  
      8         eos_token_id=tokenizer.eos_token_id,
      9     )
     10     return outputs

File c:\Users\ALI\AppData\Local\Programs\Python\Python310\lib\site-packages\peft\peft_model.py:1640, in PeftModelForCausalLM.generate(self, *args, **kwargs)
   1638             outputs = self.base_model.generate(*args, **kwargs)
   1639     else:
-> 1640         outputs = self.base_model.generate(**kwargs)
   1641 except:
...
   1682     )
   1684 if model_kwargs.get("position_ids", None) is not None:
   1685     warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")

RuntimeError: Tensors must have same number of dimensions: got 2 and 4

my tokenizer is philschmid/gemma-tokenizer-chatml but I also tried the default tokenizer of gemma2. I think the problem comes from the extra tokens that the prompt-tuned model adds but I have no idea how it’s done and how I can solve the error. Help is appreciated.

1 Like

Hi there!
The error you’re encountering (RuntimeError: Tensors must have same number of dimensions: got 2 and 4 ) is likely due to a mismatch in the input tensor dimensions when using the prompt-tuned model for generation. This issue often arises because the prompt-tuning process adds virtual tokens to the input, and the model expects these tokens to be handled correctly during inference.

from peft import PeftModel, PeftConfig

def get_outputs(model, inputs, max_new_tokens=256):
    # Ensure the model is in evaluation mode
    model.eval()
    
    # Get the prompt with virtual tokens
    inputs = model.prepare_inputs_for_generation(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=max_new_tokens,
        repetition_penalty=1.5,
        early_stopping=True,
        eos_token_id=tokenizer.eos_token_id,
    )
    
    # Generate outputs
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=max_new_tokens,
        repetition_penalty=1.5,
        early_stopping=True,
        eos_token_id=tokenizer.eos_token_id,
    )
    return outputs

# Load the trained model
trained_model = trainer.model

# Prepare the input prompt
input_prompt = tokenizer("I want you to act as a motivational coach. ", return_tensors="pt")

# Get the outputs
loaded_model_sentences_outputs = get_outputs(trained_model, input_prompt)
print(tokenizer.batch_decode(loaded_model_sentences_outputs, skip_special_tokens=True))
1 Like

Thanks but i’m still getting the same error.

it seems that this part is the problem:

File c:\Users\ALI\AppData\Local\Programs\Python\Python310\lib\site-packages\peft\peft_model.py:1680, in PeftModelForCausalLM.prepare_inputs_for_generation(self, task_ids, *args, **kwargs)
   1678     size = model_kwargs["input_ids"].shape[0], peft_config.num_virtual_tokens
   1679     prefix_attention_mask = torch.ones(size).to(model_kwargs["input_ids"].device)
-> 1680     model_kwargs["attention_mask"] = torch.cat(
   1681         (prefix_attention_mask, model_kwargs["attention_mask"]), dim=1
   1682     )
1 Like