Huggingface Question Answering on bert Validation on Squad (list index out of range())

I am new in this field. I have followed the huggingface question answering tutorial on squad dataset using bert. I have fine-tuned the model with it. But when I am trying to validate it I am having a problem.

I have used this function

import torch
from transformers import Trainer, TrainingArguments
from datasets import load_metric
metric = load_metric(“squad”)
from tqdm.auto import tqdm
import collections
import numpy as np
n_best = 20
max_answer_length = 30

def compute_metrics(start_logits, end_logits, features, examples):
example_to_features = collections.defaultdict(list)
for idx, feature in enumerate(features):
example_to_features[feature[“example_id”]].append(idx)

predicted_answers = []
for example in tqdm(examples):
    example_id = example["id"]
    context = example["context"]
    answers = []

    # Loop through all features associated with that example
    for feature_index in example_to_features[example_id]:
        start_logit = start_logits[feature_index]
        end_logit = end_logits[feature_index]
        offsets = features[feature_index]["offset_mapping"]
        start_indexes = np.argsort(start_logit)[-1 : -n_best - 1 : -1].tolist()
        end_indexes = np.argsort(end_logit)[-1 : -n_best - 1 : -1].tolist()
        for start_index in start_indexes:
            for end_index in end_indexes:
                # Skip answers that are not fully in the context
                if offsets[start_index] is None or offsets[end_index] is None:
                    continue
                # Skip answers with a length that is either < 0 or > max_answer_length
                if (
                    end_index < start_index
                    or end_index - start_index + 1 > max_answer_length
                ):
                    continue
                    
                answer = {
                    "text": context[offsets[start_index][0] : offsets[end_index][1]],
                    "logit_score": start_logit[start_index] + end_logit[end_index],
                }
                answers.append(answer)

    # Select the answer with the best score
    if len(answers) > 0:
        best_answer = max(answers, key=lambda x: x["logit_score"])
        predicted_answers.append(
            {"id": example_id, "prediction_text": best_answer["text"]}
        )
    else:
        predicted_answers.append({"id": example_id, "prediction_text": ""})
        
        print(len(answers))

theoretical_answers = [{"id": ex["id"], "answers": ex["answers"]} for ex in examples]
return metric.compute(predictions=predicted_answers, references=theoretical_answers)

compute_metrics(start_logits, end_logits, validation_dataset, raw_datasets[“validation”])

But when I am calling this function I am having this error:

"---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
/tmp/ipykernel_17/2181703361.py in
----> 1 compute_metrics(start_logits, end_logits, validation_dataset, raw_datasets[“validation”])

/tmp/ipykernel_17/2679039606.py in compute_metrics(start_logits, end_logits, features, examples)
40
41 answer = {
—> 42 “text”: context[offsets[start_index][0] : offsets[end_index][1]],
43 “logit_score”: start_logit[start_index] + end_logit[end_index],
44 }

IndexError: list index out of range"

What can be the issue for it?