ValueError: You should supply an encoding or a list of encodings to this method that includes input_ids, but you provided ['label']

I have been trying to train Deberta v3 on multiple nli datasets at the same time. And faced the error mentioned below after the training on the line result = trainer.evaluate(dataset_test_final)
My guess is the data format is incorrect but I don’t know how to fix it?

The code I ran -

import torch
from transformers import TrainingArguments, Trainer

import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader
import os
from datasets import load_dataset, load_metric, Dataset, DatasetDict
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig, DataCollatorWithPadding, DataCollator
device = "cuda" 
print(f"Device: {device}")

label2id = {"entailment": 0, "neutral": 1, "contradiction": 2}
id2label = {0: "entailment", 1: "neutral", 2: "contradiction"}

model_name = "microsoft/deberta-v3-large" 
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, model_max_length=512)  # model_max_length=512
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3, label2id=label2id, id2label=id2label).to(device)  # num_labels=3
print(model.config)

from datasets import concatenate_datasets

#MNLI MNLI MNLI
dataset_train_mnli = load_dataset("glue","mnli", split="train")  # split='train'
dataset_train_mnli = dataset_train_mnli.remove_columns(['idx'])

dataset_val_mnli_m = load_dataset("glue","mnli", split="validation_matched")  # split='train'
dataset_val_mnli_m = dataset_val_mnli_m.remove_columns(['idx'])
dataset_val_mnli_mm = load_dataset("glue","mnli", split="validation_mismatched")  # split='train'
dataset_val_mnli_mm = dataset_val_mnli_mm.remove_columns(['idx'])

dataset_test_mnli_m = load_dataset("glue","mnli", split="test_matched")  # split='train'
dataset_test_mnli_m = dataset_test_mnli_m.remove_columns(['idx'])
dataset_test_mnli_mm = load_dataset("glue","mnli", split="test_mismatched")  # split='train'
dataset_test_mnli_mm = dataset_test_mnli_mm.remove_columns(['idx'])

#ANLI ANLI ANLI
dataset_train_anli = load_dataset('anli', split=["train_r1", "train_r2", "train_r3"])
dataset_train_anli = concatenate_datasets([dataset_train_anli[0], dataset_train_anli[1], dataset_train_anli[2]])
dataset_train_anli = dataset_train_anli.remove_columns(["uid", "reason"])
dataset_test_anli = load_dataset('anli', split=["test_r1", "test_r2", "test_r3"])
dataset_test_anli = concatenate_datasets([dataset_test_anli[0], dataset_test_anli[1], dataset_test_anli[2]])
dataset_test_anli = dataset_test_anli.remove_columns(["uid", "reason"])

#SNLI SNLI SNLI
dataset_train_snli = load_dataset("snli", split="train") 
dataset_test_snli = load_dataset("snli", split="test")  
dataset_val_snli = load_dataset("snli", split="validation")  
from datasets import concatenate_datasets

dataset_train_final = concatenate_datasets([dataset_train_mnli, dataset_train_snli, dataset_train_anli])  # dataset_train_effec
dataset_train_final= dataset_train_final.shuffle(seed=42)

dataset_test_final= concatenate_datasets([dataset_val_mnli_mm,dataset_val_mnli_m,dataset_test_mnli_mm,dataset_test_mnli_m,dataset_val_snli,dataset_test_snli,dataset_test_anli,dataset_train_anli])
dataset_test_final= dataset_test_final.shuffle(seed=42) 


dynamic_padding = True

def tokenize_func(examples):
	return tokenizer(examples["premise"], examples["hypothesis"], truncation=True)  # max_length=512,  padding=True

encoded_dataset_train = dataset_train_final.map(tokenize_func, batched=True)
encoded_dataset_test = dataset_test_final.map(tokenize_func, batched=True)
data_collator = DataCollator(tokenizer)
if  dynamic_padding == True:
	data_collator = DataCollatorWithPadding(tokenizer)

n_tokens = [len(encoding) for encoding in encoded_dataset_train["input_ids"]]

from datasets import list_metrics
metric = load_metric('accuracy')

train_args = TrainingArguments(
    output_dir=f'./results/output',
    logging_dir=f'./logs/output',
    learning_rate=6e-6,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    warmup_ratio=0.06,  
    weight_decay=0.1,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    fp16=True,
    fp16_full_eval=True,
    evaluation_strategy="epoch",
    seed=42,
    save_strategy = "epoch",
    save_total_limit=5,
    logging_strategy="epoch",
    report_to="all")

def compute_metrics(eval_pred):
	predictions, labels = eval_pred
	predictions = np.argmax(predictions, axis=1)
	return metric.compute(predictions=predictions, references=labels)

trainer = Trainer( 
    model=model,
    tokenizer=tokenizer,
    args=train_args,
    train_dataset=encoded_dataset_train,  
    eval_dataset=encoded_dataset_test,
    compute_metrics=compute_metrics
)


trainer.train()

result = trainer.evaluate(dataset_test_final)  
print(result)

The error-

Traceback (most recent call last):
  File "trainersv3.py", line 115, in <module>
    result = trainer.evaluate(dataset_test_final)
  File "/opt/conda/lib/python3.7/site-packages/transformers/trainer.py", line 2128, in evaluate                               
    metric_key_prefix=metric_key_prefix,
  File "/opt/conda/lib/python3.7/site-packages/transformers/trainer.py", line 2283, in evaluation_loop                        
    for step, inputs in enumerate(dataloader):
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 521, in __next__                         
    data = self._next_data()
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 561, in _next_data                       
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch                           
    return self.collate_fn(data)
  File "/opt/conda/lib/python3.7/site-packages/transformers/data/data_collator.py", line 231, in __call__                     
    return_tensors=self.return_tensors,
  File "/opt/conda/lib/python3.7/site-packages/transformers/tokenization_utils_base.py", line 2718, in pad                    
    "You should supply an encoding or a list of encodings to this method "
ValueError: You should supply an encoding or a list of encodings to this method that includes input_ids, but you provided ['label']

Apologies for the long post. Thank you for reading and trying to help.