Implementing sliding window to BERT for NER

I want to implement a sliding window approach while finetuning BERT NER using Stride and Return_overflowing_tokens but I’m not sure how to implement it.

I have an initial code but I keep getting this error ValueError: expected sequence of length 4079 at dim 1 (got 5846) and the length keeps changing each run.

the code:

tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
model = AutoModelForTokenClassification.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")

custom_labels=['O', 'B-Disease_disorder', 'I-Disease_disorder']
label_encoding_dict= {'O': 0,  'B-Disease_disorder': 1, 'I-Disease_disorder': 2}

def tokenize_and_align_labels(examples):
    label_all_tokens = True
    stride = 200
    window_size = 512

    tokenized_inputs = tokenizer(
        list(examples["token"]),
        truncation=False,
        is_split_into_words=True,
       padding=False, 
       max_length = 512
    )



    labels = []
    for i, label in enumerate(examples["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        label_ids = []
        for start in range(0, len(word_ids), stride):
            end = start + window_size
            window_word_ids = word_ids[start:end]
            window_label = label[start:end]

            previous_word_idx = None
            for j, word_idx in enumerate(window_word_ids):
                if j >= len(window_label):
                    break

                if word_idx is None:
                    label_ids.append(-100)
                elif window_label[j] == '0':
                    label_ids.append(0)
                elif word_idx != previous_word_idx:
                    label_ids.append(label_encoding_dict[window_label[j]])
                else:
                    label_ids.append(label_encoding_dict[window_label[j]] if label_all_tokens else -100)
                previous_word_idx = word_idx

        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs


trainer = Trainer(
    model,
    args,
    train_dataset=train_tokenized_datasets,
    eval_dataset=test_tokenized_datasets,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()