Model won't save correctly and will not inference correctly when imported

Hi, I am trying to save a model with

model_path = "./deberta-v3-large-5"
trainer.save_model(model_path)

and load it with

reloaded_model = AutoModelForSequenceClassification.from_pretrained(
    model_path, num_labels=2, id2label=id2label, label2id=label2id)

This is my full code:

from datasets import load_dataset, DatasetDict, Dataset

from transformers import (
    AutoTokenizer,
    AutoConfig, 
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer)

from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
import evaluate
import torch
import numpy as np

#Initial load of the model
model_checkpoint = 'microsoft/deberta-v3-large'
# model_checkpoint = 'roberta-base' # you can alternatively use roberta-base but this model is bigger thus training will take longer

# Define label maps specific to your task
id2label = {0: "Human", 1: "AI"}
label2id = {"Human": 0, "AI": 1}

# Generate classification model from model_checkpoint with the defined labels
model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, num_labels=2, id2label=id2label, label2id=label2id)

# create tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast = False)

# add pad token if none exists
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
def tokenize_function(examples):
    # Extract text entries from the 'texts' field
    texts = examples["texts"]

    # Tokenize and optionally truncate the text; handling batch of texts
    tokenized_inputs = tokenizer(
        texts,
        truncation=True,
        max_length=512,
        padding="max_length",  # Ensure all sequences are padded to the same length for batch processing
        return_tensors="pt"  # Return PyTorch tensors
    )

    return tokenized_inputs

from transformers import EarlyStoppingCallback
# Adjust these parameters to fit your needs
early_stopping_patience = 2  # Number of evaluations with no improvement after which training will be stopped

peft_config = LoraConfig(task_type="SEQ_CLS",
                        r=1,
                        lora_alpha=16,
                        lora_dropout=0.2)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
# hyperparameters
lr = 1e-4
batch_size = 16
num_epochs = 1
# define training arguments
training_args = TrainingArguments(
    output_dir= model_checkpoint + "-lora-text-classification",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    weight_decay=0.02,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",  # This ensures that logs are generated at the end of each epoch
    load_best_model_at_end=True,
    logging_steps =50,
)
# creater trainer object
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator, # this will dynamically pad examples in each batch to be equal length
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience)]
)


# train model
trainer.train()

And to test, I am using

import json
import torch
import numpy as np
from sklearn.metrics import confusion_matrix

# Assuming the tokenizer and model have been already loaded

# Path to the JSON file
json_file_path = "/home/mario/bittensor/miguel_32/bittensor32/custom/model_training/new_test_data/query_results__gpu0__01bc13c3aaa94fca94f433c548102e92.json"

# Lists to store all texts and true labels
all_texts = []
all_true_labels = []

# Load data from the JSON file
with open(json_file_path, 'r') as file:
    data = json.load(file)
    all_texts.extend(data['texts'])  # Adjust field names based on your JSON structure
    all_true_labels.extend(data['labels'])  # Adjust field names based on your JSON structure

# Lists to store predictions
all_predicted_labels = []

print("Model predictions:")
print("-------------------")
# Make prediction for each text
for text in all_texts[:100]:
    print("prediction")
    # Tokenize the text
    inputs = tokenizer(
        text,
        truncation=True,
        max_length=512,
        padding="max_length",  # Ensure all sequences are padded to the same length for batch processing
        return_tensors="pt"  # Return PyTorch tensors
    )
    # Move tensors to the same device as model
    inputs = {k: v.to(reloaded_model.device) for k, v in inputs.items()}

    # Make prediction
    with torch.no_grad():
        outputs = reloaded_model(**inputs)

    # Handle different output types
    if isinstance(outputs, tuple):
        logits = outputs[0]
    else:
        logits = outputs.logits

     # Convert logits to probabilities
    probabilities = torch.nn.functional.softmax(logits, dim=-1)

    # Get the most likely class
    predicted_class_id = probabilities.argmax().item()

    # Collect the predicted label
    all_predicted_labels.append(predicted_class_id)

# Calculate the confusion matrix
conf_matrix = confusion_matrix(all_true_labels[:100], all_predicted_labels)
print("Confusion Matrix:")
print(conf_matrix)

My problem is that the confusion matrix completely changes after saving and loading, and I keep getting this warning:

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.```
Thank you very much