Error/warning: Not all data has been set. Are you sure you passed all values?

Hi all,
while using Trainer to train a BERT model, I receive the following error/warning:
“Not all data has been set. Are you sure you passed all values?”
I’m not able to fix it and it seems to calculate the wrong metrics. I assume because of the missing data.
About my setup: I want to train a BERT model with a custom head for multilabel classification.
This is my code:

import pandas as pd
import numpy as np
import datasets
import json
import torch
from sklearn import metrics
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, f1_score
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from transformers import BertModel, BertTokenizer
from sklearn.model_selection import train_test_split
from datasets import Dataset
from torch import cuda
device = ‘cuda’ if cuda.is_available() else ‘cpu’

MODEL_NAME = ‘dbmdz/bert-base-german-uncased’
SEED = 321

def compute_metrics_multilables_b(eval_pred):
predictions, labels = eval_pred
predictions = torch.tensor(predictions)
preds_full = torch.sigmoid(predictions).cpu().detach().numpy().tolist()

preds_full = np.array(preds_full) >= 0.5
labels = np.array(labels) >= 0.5

accuracy = metrics.accuracy_score(labels, preds_full)
f1_score_micro = metrics.f1_score(labels, preds_full, average='micro')
f1_score_macro = metrics.f1_score(labels, preds_full, average='macro')
metrics_result = {
            'accuracy': accuracy,
            'f1_micro': f1_score_micro,
            'f1_macro': f1_score_macro,

return metrics_result

class EmotionDataset(
def init(self, encodings, labels):
self.encodings = encodings
self.labels = labels

def __getitem__(self, idx):
    item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
    item['labels'] = torch.tensor(self.labels[idx])
    return item

def __len__(self):
    return len(self.labels)

class CustomTrainer(Trainer):
def compute_loss(self, model, inputs):
labels = inputs.pop(“labels”)
outputs = model(inputs[‘input_ids’], inputs[‘attention_mask’], inputs[‘token_type_ids’])
labels = labels.type_as(outputs)
logits = outputs
return torch.nn.BCEWithLogitsLoss()(logits, labels)

class MultiLabelClassifier(torch.nn.Module):
def init(self):
super(MultiLabelClassifier, self).init()
self.l1 = BertModel.from_pretrained(MODEL_NAME)
self.l2 = torch.nn.Dropout(0.3)
# output is a 8-dim vector
self.l3 = torch.nn.Linear(768, 8)

def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, 
            head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, 
            output_hidden_states=None, return_dict=None):
    output_1 = self.l1(input_ids, attention_mask = attention_mask, token_type_ids = token_type_ids).pooler_output 
    output_2 = self.l2(output_1)
    output = self.l3(output_2)
    return output

dataset_train = Dataset.from_pandas(df_train)
dataset_validation = Dataset.from_pandas(df_validation)
dataset_test = Dataset.from_pandas(df_test)

load model and tokenizer

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = BertModel.from_pretrained(MODEL_NAME)

preprocess data

field_text = “Text”
field_label = “list”

tokenize data

train_encodings = tokenizer(dataset_train[field_text], truncation=True, padding=True)
val_encodings = tokenizer(dataset_validation[field_text], truncation=True, padding=True)
test_encodings = tokenizer(dataset_test[field_text], truncation=True, padding=True)

train_dataset = EmotionDataset(train_encodings, dataset_train[field_label])
val_dataset = EmotionDataset(val_encodings, dataset_validation[field_label])
test_dataset = EmotionDataset(test_encodings, dataset_test[field_label])

model = MultiLabelClassifier()
_ =

training_args = TrainingArguments(
output_dir=’./results’, # output directory
num_train_epochs=1, # total # of training epochs
per_device_train_batch_size=8, # batch size per device during training
per_device_eval_batch_size=20, # batch size for evaluation
warmup_steps=500, # number of warmup steps for learning rate scheduler
weight_decay=0.01, # strength of weight decay
logging_dir=’./logs’, # directory for storing logs

trainer = CustomTrainer(
model=model, # the instantiated :hugs: Transformers model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=test_dataset, # evaluation dataset

_ = trainer.train()

The target/predicition is a binary 8-dim vector for each data record. The error/warning is thrown by trainer.evaluate().

Any idea what I did wrong?

Since the code is hard to read here, here the link to the pastebin snippet:


I believe this is the same problem as in this topic. Make sure your model outputs tuples if you want to use it with Trainer. And your compute_loss should have a return_outputs argument to work well with the last version of transformers.

1 Like

Thanks for the quick replay.

I tried your suggestion from the other thread. But it still throws the same error. Is my return value of the loss function still wrong?

Here my new loss function:

def compute_loss(self, model, inputs, return_outputs = False):
    labels = inputs.pop("labels")
    outputs = model(inputs['input_ids'], inputs['attention_mask'], inputs['token_type_ids'])
    labels = labels.type_as(outputs)
    logits = outputs
    loss = torch.nn.BCEWithLogitsLoss()(logits, labels)
    return (loss, (loss, logits)) if return_outputs else loss

Solved the problem with this solution:

1 Like