Problems with trainer.compute_metrics

hello, i use transformers.trainer to fine-tune the codellama-7b-python. i want using bleu to eval model during training, but when i rewrite the compute_metric function, i decode the lables and input_ids,and i find that the decode result is not the primary text i encode. here is my encode function

def tokenize2( prompt, tokenizer, cutoff_len, padding=False):
result = tokenizer(
prompt,
truncation=True,
max_length=cutoff_len,
padding=‘max_length’ if padding else False,
return_tensors=None
)
return {
“input_ids”: result[“input_ids”],
“labels”: copy.deepcopy(result[“input_ids”])
}
def tokenize_prompt(data_point:Dict, tokenizer=tokenizer, raw_source_len=256, cutoff_len=512):
# tokenized_result = self.src_tokenize(src, tokenizer, cutoff_len)

    src=data_point['text']
    tgt=data_point['target']
    tokenized_result = tokenize2(src, tokenizer, raw_source_len, padding=False)

    source_len = len(tokenized_result['input_ids'])

    assert source_len<=raw_source_len
    assert len(tgt)>0

    src = tokenizer.decode(tokenized_result['input_ids'], skip_special_tokens=True, clean_up_tokenization_spaces=True)
    prompt_with_response = src + tgt + " " + tokenizer.eos_token

    tokenized_with_response = tokenize2(prompt_with_response, tokenizer, cutoff_len, padding=False)

    tokenized_with_response["labels"] = [-100] * source_len + tokenized_with_response["labels"][source_len:]
    # print(tokenizer.decode(tokenized_with_response["labels"], skip_special_tokens=True,
    #                        clean_up_tokenization_spaces=True))

    assert len(tokenized_with_response["input_ids"]) == len(tokenized_with_response["labels"])

    return {'input_ids':tokenized_with_response["input_ids"],'labels': tokenized_with_response["labels"]}