run bert-base-uncased with sst2 dataset, but all predictions are 1!!
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import datasets
import torch
from sklearn.metrics import accuracy_score
from transformers import pipeline
from transformers.pipelines.pt_utils import KeyDataset
from datasets import load_dataset
import evaluate
load pretrained model
model_name=‘bert-base-uncased’
model = AutoModelForSequenceClassification.from_pretrained(model_name)
load the tokenixer for the pretrained model
tokenizer = AutoTokenizer.from_pretrained(model_name)
create the pieline
generator = pipeline(task=‘sentiment-analysis’, model=model, tokenizer=tokenizer)
load dataset
dataset = load_dataset(“sst2”,split=“test”)
pass the dataset to the pipeline
results = generator(KeyDataset(dataset, “sentence”))
Extract true labels from the datasePrt
true_labels = dataset[“label”]
print(“True labels”, true_labels)
print(“TrueLabelSize=”,len(true_labels))
Extract the labels from the pipeline results
pred_labels= [0 if result[“label”]==“LABEL_0” else 1 for result in results]
print(“PredLabels”,pred_labels)
print(“predLabelSize=”,len(pred_labels))
Load the accuracy metric
accuracy_metric = evaluate.load(“accuracy”)
Compute accuracy
accuracy = accuracy_metric.compute(predictions=pred_labels, references=true_labels)
print(“Accuracy:”, accuracy)