Please save me : GPT like model Generation gone wrong

Generation begins with eos_token and continue generating a token I added until the last token.

I intended to fine_tune the model using QLORA but encountered this problem, and even more, when I used only the quantized version, it funnily gave the same results !!

NOTE : I modified the tokenizer and embeddings of the model like this before saving them to be used after in fine tuning Lora :

Tokenizer :

tokenizer = AutoTokenizer.from_pretrained("core42/jais-13b",trust_remote_code=True)
tokenizer.padding_side = 'right'
print(tokenizer.eos_token)
tokenizer.add_special_tokens({'pad_token':'<|pad|>'})
print(tokenizer.pad_token)

hidden_tokens = [f'<hidden_{i}>' for i in range(0, 301)]
number_tokens = [f'<number_{i}>' for i in range(0, 301)]
special_tokens_1 = hidden_tokens + number_tokens

tokenizer.add_tokens(special_tokens_1)

special_tokens_2 = ["###Prompt:", "###Completion:"]
tokenizer.add_tokens(special_tokens_2)

Model

model = AutoModelForCausalLM.from_pretrained(
    "Model_de_base/models--core42--jais-13b/snapshots/cce6dfc87639f6146fc15333bd326db236497879",
    device_map=device_map,
    quantization_config=bnb_config
)

model.resize_token_embeddings(len(tokenizer))

embedding_layer = model.get_input_embeddings()
average_embedding = torch.mean(embedding_layer.weight.data, dim=0)

# Define noise scales
group_noise_scale = 0.05  # Differentiates between 'hidden' and 'number' groups
token_noise_scale = 0.01  # Differentiates between tokens within a group

# Add group-level noise to differentiate 'hidden' and 'number' tokens
hidden_group_noise = torch.randn_like(average_embedding) * group_noise_scale
number_group_noise = torch.randn_like(average_embedding) * group_noise_scale

# Initialize 'hidden' token embeddings
for token in hidden_tokens:
    token_id = tokenizer.convert_tokens_to_ids(token)
    token_noise = torch.randn_like(average_embedding) * token_noise_scale
    embedding_layer.weight.data[token_id] = average_embedding + hidden_group_noise + token_noise

# Initialize 'number' token embeddings
for token in number_tokens:
    token_id = tokenizer.convert_tokens_to_ids(token)
    token_noise = torch.randn_like(average_embedding) * token_noise_scale
    embedding_layer.weight.data[token_id] = average_embedding + number_group_noise + token_noise

for token in special_tokens_2:
    token_id = tokenizer.convert_tokens_to_ids(token)
    random_embedding = torch.randn_like(embedding_layer.weight[token_id])
    embedding_layer.weight.data[token_id] = random_embedding.clone().detach()

model.config.vocab_size = len(tokenizer)
model.lm_head = torch.nn.Linear(model.config.hidden_size, len(tokenizer))

I’ll never forget your help.