How can I get the score from Question-Answer Pipeline? Is there a bug when Question-answer pipeline is used?

When I run the following code

from transformers import AutoTokenizer, AutoModelForQuestionAnswering


import torch

tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad") model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

text = r""" As checked Dis is not yet on boarded to ARB portal, hence we cannot upload the invoices in portal """

questions = [ "Dis asked if it is possible to post the two invoice in ARB.I have not access so I wanted to check if you would be able to do it.", ]

for question in questions: inputs = tokenizer.encode_plus(question, text, add_special_tokens=True, return_tensors="pt") input_ids = inputs["input_ids"].tolist()[0]


text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
answer_start_scores, answer_end_scores = model(**inputs)

answer_start = torch.argmax(
    answer_start_scores
)  # Get the most likely beginning of answer with the argmax of the score
answer_end = torch.argmax(answer_end_scores) + 1  # Get the most likely end of answer with the argmax of the score

answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))

print(f"Question: {question}")
print(f"Answer: {answer}\n")

The answer that I get here is:

Question: Dis asked if it is possible to post the two invoice in ARB.I have not access so I wanted to check if you would be able to do it. Answer: dis is not yet on boarded to ARB portal

How do I get a score for this answer? Score here is very similar to what is I get when I run Question-Answer pipeline .

I have to take this approach since Question-Answer pipeline when used is giving me Key Error for the below code

from transformers import pipeline
nlp = pipeline("question-answering")

context = r""" As checked Dis is not yet on boarded to ARB portal, hence we cannot upload the invoices in portal. """
print(nlp(question="Dis asked if it is possible to post the two invoice in ARB?", context=context))

See how decode is called here and defined here.
You can modify decode like this to get the scores:

def decode(start: np.ndarray, end: np.ndarray, topk: int, max_answer_len: int):

    # Compute the score of each tuple(start, end) to be the real answer
    outer = np.matmul(np.expand_dims(start, -1), np.expand_dims(end, 1))

    # Remove candidate with end < start and end - start > max_answer_len
    candidates = np.tril(np.triu(outer), max_answer_len - 1)

    #  Inspired by Chen & al. (https://github.com/facebookresearch/DrQA)
    scores_flat = candidates.flatten()


    if topk == 1:
        idx_sort = [np.argmax(scores_flat)]
    elif len(scores_flat) < topk:
        idx_sort = np.argsort(-scores_flat)
    else:
        idx = np.argpartition(-scores_flat, topk)[0:topk]
        idx_sort = idx[np.argsort(-scores_flat[idx])]

    idx = np.argpartition(-scores_flat, topk)[0:topk]
    idx_sort = idx[np.argsort(-scores_flat[idx])]

    starts, ends = np.unravel_index(idx_sort, candidates.shape)[1:]
    scores = candidates[0, starts, ends]
    return starts, ends, scores
1 Like