ERROR: vars() argument must have __dict__ attribute when trying to use trainer.train()?

I have the following model that I am trying to fine-tune (CLIP_ViT + classification head). Here’s my model definition:

class CLIPNN(nn.Module):

    def __init__(self, num_labels, pretrained_name="openai/clip-vit-base-patch32", dropout=0.1):
        super().__init__()
        self.num_labels = num_labels
        # load pre-trained transformer & processor
        self.transformer = CLIPVisionModel.from_pretrained(pretrained_name)
        self.processor = CLIPProcessor.from_pretrained(pretrained_name)
        # initialize other layers (head after the transformer body)
        self.classifier = nn.Sequential(
            nn.Linear(512, 128, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout, inplace=False),
            nn.Linear(128, self.num_labels, bias=True))
        
        def forward(self, inputs, labels=None, **kwargs):
            logits = self.classifier(inputs)
            loss = None
            if labels is not None:
                loss_fct = nn.CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

            return SequenceClassifierOutput(
                loss=loss,
                logits=logits,
            )

I also have the following definition for a dataset:

class CLIPDataset(nn.utils.data.Dataset):
    def __init__(self, embeddings, labels):
        self.embeddings = embeddings
        self.labels = labels

    def __getitem__(self, idx):
        item = {"embeddings": nn.Tensor(self.embeddings[idx])}
        item['labels'] = nn.LongTensor([self.labels[idx]])
        return item

    def __len__(self):
        return len(self.labels)

Note: here I am assuming that the model is fed pre-computed embeddings and does not compute embeddings, I know this is not the right logic if I want to fine-tune the CLIP base model, I am just trying to get my code to work.

Something like this throws an error:

model = CLIPNN(num_labels=2)
train_data = CLIPDataset(train_data, y_train)
test_data = CLIPDataset(test_data, y_test)

trainer = Trainer(
    model=model, args=training_args, train_dataset=train_data, eval_dataset=test_data
)
trainer.train()

TypeError Traceback (most recent call last)
in
----> 1 trainer.train()

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1256 self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
1257
→ 1258 for step, inputs in enumerate(epoch_iterator):
1259
1260 # Skip past any already trained steps if resuming training

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/dataloader.py in next(self)
515 if self._sampler_iter is None:
516 self._reset()
→ 517 data = self._next_data()
518 self._num_yielded += 1
519 if self._dataset_kind == _DatasetKind.Iterable and \

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
555 def _next_data(self):
556 index = self._next_index() # may raise StopIteration
→ 557 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
558 if self._pin_memory:
559 data = _utils.pin_memory.pin_memory(data)

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
45 else:
46 data = self.dataset[possibly_batched_index]
—> 47 return self.collate_fn(data)

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py in default_data_collator(features, return_tensors)
64
65 if return_tensors == “pt”:
—> 66 return torch_default_data_collator(features)
67 elif return_tensors == “tf”:
68 return tf_default_data_collator(features)

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py in torch_default_data_collator(features)
80
81 if not isinstance(features[0], (dict, BatchEncoding)):
—> 82 features = [vars(f) for f in features]
83 first = features[0]
84 batch = {}

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py in (.0)
80
81 if not isinstance(features[0], (dict, BatchEncoding)):
—> 82 features = [vars(f) for f in features]
83 first = features[0]
84 batch = {}

TypeError: vars() argument must have dict attribute

Any clue where I’m going wrong?

It looks like your train_data variable is used for two different things. Are you sure you passed the instance of your CLIPDataset to the Trainer? Cause it looks like the elements of the training dataset of the Trainer are not dictionaries from the error message.

I just tried again, and I’m getting the following error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-11-d197fa510b6c> in <module>
     20 )
     21 
---> 22 trainer.train()

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   1282                         tr_loss += self.training_step(model, inputs)
   1283                 else:
-> 1284                     tr_loss += self.training_step(model, inputs)
   1285                 self.current_flos += float(self.floating_point_ops(inputs))
   1286 

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/trainer.py in training_step(self, model, inputs)
   1787                 loss = self.compute_loss(model, inputs)
   1788         else:
-> 1789             loss = self.compute_loss(model, inputs)
   1790 
   1791         if self.args.n_gpu > 1:

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
   1819         else:
   1820             labels = None
-> 1821         outputs = model(**inputs)
   1822         # Save past state if it exists
   1823         # TODO: this needs to be fixed and made cleaner later.

~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

TypeError: forward() missing 1 required positional argument: 'inputs'

am I extending the model properly to work with HF trainer or am I doing something wrong here?

The features in your dataset must have names that match the signature of the model, so either change inptus by embeddings in your model or change embeddings by inputs in your dataset.

I’m not sure if I’m understanding this correctly, but if I change the datasets feature names to inputs (instead of embeddings) I’m still getting the same error.

class CLIPDataset(nn.utils.data.Dataset):
    def __init__(self, inputs, labels):
        self.inputs = inputs
        self.labels = labels

    def __getitem__(self, idx):
        item = {"inputs": nn.Tensor(self.embeddings[idx])}
        item['labels'] = nn.LongTensor([self.labels[idx]])
        return item