Inconsistency in Model Output [ Token Classification]

from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch
from transformers import pipeline
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
example = "My name is Wolfgang and I live in Berlin"
ner_results = nlp(example)
print(ner_results)

Output 1:

[{'entity': 'B-PER', 'score': 0.9990139, 'index': 4, 'word': 'Wolfgang', 'start': 11, 'end': 19}, 
{'entity': 'B-LOC', 'score': 0.999645, 'index': 9, 'word': 'Berlin', 'start': 34, 'end': 40}]
inputs = tokenizer.encode_plus(example, return_tensors="pt", add_special_tokens=True, max_length=512, padding="max_length", truncation=True)
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
# Feed the encoded segment into the model to obtain the predicted labels for each token
outputs = model(input_ids, attention_mask=attention_mask)
logits = outputs.logits
predicted_labels = torch.argmax(logits, dim=2)[0]

[0, 0, 0, 0, 3, 0, 0, 0, 0, 0]

label_list = [ "O","B-MISC","I-MISC","B-PER","I-PER","B-ORG","I-ORG","B-LOC","I-LOC"]
final_label_names = [label_list[label] for label in predicted_labels]

Output 2:

['O','O','O','O', 'B-PER', 'O','O','O','O','O']