LLama2 trained on completions only repeating prompt during inference

I’m trying to fine-tune llama2 on completions only. For some reason, at inference time, the fine-tuned model’s generations all include the exact prompt text instead of just the completion. What might be causing this?

Demos of my train and inference code are attached. Here is what the prompt output pair looks like at inference:

{“prompt”: “## original_lyrics ## every time you pop up you should see my fuckin eye roll ## cleaned_lyrics ##”,
“generated_text”: “## original_lyrics ## every time you pop up you should see my fuckin eye roll ## cleaned_lyrics ##\n\u0409 every time you pop up you should see my eye roll\nEvery time you pop up you should see my eye roll\nEvery time you pop up you should see my eye”}

# Tokenizer
tokenizer= AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True, token= huggingface_token)
tokenizer.add_bos_token= True
tokenizer.add_eos_token= True
tokenizer.pad_token = "<pad>"
tokenizer.padding_side= "left"

# Modify dataset to suit completions only training
completion_template= "## cleaned_lyrics ##" ### Cleaned Lyrics: ###"
prompt_template= "## original_lyrics ##"
completion_template_ids= tokenizer.encode(completion_template, add_special_tokens=False)
prompt_template_ids= tokenizer.encode(prompt_template, add_special_tokens=False)
collator= DataCollatorForCompletionOnlyLM(instruction_template= prompt_template_ids, response_template= completion_template_ids, tokenizer= tokenizer)

def formatting_prompts_func(example):
  output_texts= []
  prompt_template= "## original_lyrics ##"
  completion_template= "## cleaned_lyrics ##" ### Cleaned Lyrics: ###"
  for i in range(len(example["input"])):
    line_result= {}
    text= f"{prompt_template} {example['input'][i]} {completion_template} {example['target'][i]}"
    line_result["prompt"]= text
    output_texts.append(line_result)
  return output_texts


new_dataset= formatting_prompts_func(new_dataset["train"])
valid_set= formatting_prompts_func(valid_set["validate"])

train_set= Dataset.from_list(new_dataset)
valid_set= Dataset.from_list(valid_set)


trainer= SFTTrainer(
    model= model,
    data_collator= collator,
    train_dataset= train_set,
    dataset_text_field= "prompt",
    tokenizer= tokenizer,
    args= training_params,
    max_seq_length= None,
    eval_dataset= valid_set,
    callbacks= [callback],
    peft_config= peft_config,
)

trainer.train()

test_dataset= load_dataset(dataset_name, token= huggingface_token, data_files= {"test":"test.jsonl"}, split="test[:2%]")
test_set= test_dataset.remove_columns(["original_track_title", "original_track_artist", "kb_track_title"])

#Add completion prompt to training and validation set

tokenizer= AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True, token= huggingface_token)
tokenizer.add_bos_token= True
tokenizer.add_eos_token= True
tokenizer.pad_token = "<pad>"
tokenizer.padding_side= "left"
completion_template= "## cleaned_lyrics ##" ### Cleaned Lyrics: ###"
prompt_template= "## original_lyrics ##"
completion_template_ids= tokenizer.encode(completion_template, add_special_tokens=False)
prompt_template_ids= tokenizer.encode(prompt_template, add_special_tokens=False)
collator= DataCollatorForCompletionOnlyLM(instruction_template= prompt_template_ids, response_template= completion_template_ids, tokenizer= tokenizer)

def format_dataset(data_set):
  output_texts= []
  prompt_template= "## original_lyrics ##"
  completion_template= "## cleaned_lyrics ##"
  for i in range(len(data_set)):
    text= f"{prompt_template} {data_set['input'][i]} {completion_template}"
    result= {}
    result["prompt"]= text
    output_texts.append(result)
  return output_texts


test_set= format_dataset(test_set)
test_set= Dataset.from_list(test_set)


gen_config= GenerationConfig(
    temperature=0.2,
    top_p=0.6,
    use_cache=True,
    do_sample=True,
    return_full_text=False,
    #repetition_penalty=1.15,
    #max_length= None,
)

# Load local Model
model= AutoModelForCausalLM.from_pretrained(model_path, quantization_config= quantization_param,
    device_map={"": 0}, token= huggingface_token
)

model.generation_config= gen_config
model.config.use_cache= True


# Define a function to generate text using the loaded model
def generate_text(model, prompt):
    prompt_ids= tokenizer(prompt, return_tensors="pt")
    prompt_ids= prompt_ids.to('cuda')
    len_prompt= len(prompt_ids.input_ids[0]) * 1.25
    generator= model.generate(**prompt_ids, max_new_tokens= len_prompt, generation_config=gen_config)
    generated_text= tokenizer.decode(generator[0], skip_special_tokens=True)
    return generated_text


# Iterate over the dataset and generate text
#for example in test_set["test"]:
response_list= []
for example in test_set:
    new_dict= {}
    new_dict["prompt"]= example['prompt']
    new_dict["generated_text"]= generate_text(model, example['prompt'])
    response_list.append(new_dict)


# Save the generated texts and target completions to files
filename= f"test_generations/generated_texts_{time}_{param}.jsonl"
with open(filename, 'w') as file:
    for entry in response_list:
        json.dump(entry, file)
        file.write('\n')