Hello guys
I would like to evaluate a model from the HF repo (‘mrm8488/bert-italian-finedtuned-squadv1-it-alfa’) on a SQuAD file I compiled, just to have a rough estimation of what could be the metrics.
This is the code i wrote:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, Trainer, TrainingArguments
import torch
from transformers import default_data_collator
import json
# Model from HuggingFace
model_checkpoint = 'mrm8488/bert-italian-finedtuned-squadv1-it-alfa'
# Import tokenizer
my_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
# Import model
my_model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
# Dataset for evaluation
eval_data_path = '/content/drive/MyDrive/BERT/SQuAD_files/result.json'
with open(eval_data_path) as json_file:
data = json.load(json_file)
data_collator = default_data_collator
trainer = Trainer(
Unfortunately, this does not work and I can’t understand why.
This is the error:
KeyError Traceback (most recent call last)
<ipython-input-31-54109037e744> in <module>()
----> 1 trainer.evaluate(data)
5 frames
/usr/local/lib/python3.7/dist-packages/transformers/trainer.py in evaluate(self, eval_dataset, ignore_keys, metric_key_prefix)
2006 prediction_loss_only=True if self.compute_metrics is None else None,
2007 ignore_keys=ignore_keys,
-> 2008 metric_key_prefix=metric_key_prefix,
2009 )
/usr/local/lib/python3.7/dist-packages/transformers/trainer.py in evaluation_loop(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)
2145 observed_num_examples = 0
2146 # Main evaluation loop
-> 2147 for step, inputs in enumerate(dataloader):
2148 # Update the observed num examples
2149 observed_batch_size = find_batch_size(inputs)
/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in __next__(self)
515 if self._sampler_iter is None:
516 self._reset()
--> 517 data = self._next_data()
518 self._num_yielded += 1
519 if self._dataset_kind == _DatasetKind.Iterable and \
/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
555 def _next_data(self):
556 index = self._next_index() # may raise StopIteration
--> 557 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
558 if self._pin_memory:
559 data = _utils.pin_memory.pin_memory(data)
/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
KeyError: 0
A glimpse of my SQuAD formatted file:
"data": [
"paragraphs": [
"qas": [
"question": "Qual è l’età?",
"id": 78079,
"answers": [
"answer_id": 89658,
"document_id": 84480,
"question_id": 78079,
"text": "02/01/1966",
"answer_start": 113,
"answer_category": "SHORT"
"is_impossible": false
"question": "Qual è il titolo di studio?",
"id": 78082,
"answers": [
"answer_id": 89661,
"document_id": 84480,
"question_id": 78082,
"text": "media superiore",
"answer_start": 1157,
"answer_category": "SHORT"
"is_impossible": false
"context" = "..."
"document_id" = "..."
What am I doing wrong?
PS: is there a tool from the HF libraries to annotate files for question answering in a smart way?
Thanks a lot