I have the following model that I am trying to fine-tune (CLIP_ViT + classification head). Here’s my model definition:
class CLIPNN(nn.Module):
def __init__(self, num_labels, pretrained_name="openai/clip-vit-base-patch32", dropout=0.1):
super().__init__()
self.num_labels = num_labels
# load pre-trained transformer & processor
self.transformer = CLIPVisionModel.from_pretrained(pretrained_name)
self.processor = CLIPProcessor.from_pretrained(pretrained_name)
# initialize other layers (head after the transformer body)
self.classifier = nn.Sequential(
nn.Linear(512, 128, bias=True),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout, inplace=False),
nn.Linear(128, self.num_labels, bias=True))
def forward(self, inputs, labels=None, **kwargs):
logits = self.classifier(inputs)
loss = None
if labels is not None:
loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
return SequenceClassifierOutput(
loss=loss,
logits=logits,
)
I also have the following definition for a dataset:
class CLIPDataset(nn.utils.data.Dataset):
def __init__(self, embeddings, labels):
self.embeddings = embeddings
self.labels = labels
def __getitem__(self, idx):
item = {"embeddings": nn.Tensor(self.embeddings[idx])}
item['labels'] = nn.LongTensor([self.labels[idx]])
return item
def __len__(self):
return len(self.labels)
Note: here I am assuming that the model is fed pre-computed embeddings and does not compute embeddings, I know this is not the right logic if I want to fine-tune the CLIP base model, I am just trying to get my code to work.
Something like this throws an error:
model = CLIPNN(num_labels=2)
train_data = CLIPDataset(train_data, y_train)
test_data = CLIPDataset(test_data, y_test)
trainer = Trainer(
model=model, args=training_args, train_dataset=train_data, eval_dataset=test_data
)
trainer.train()
TypeError Traceback (most recent call last)
in
----> 1 trainer.train()~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1256 self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
1257
→ 1258 for step, inputs in enumerate(epoch_iterator):
1259
1260 # Skip past any already trained steps if resuming training~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/dataloader.py in next(self)
515 if self._sampler_iter is None:
516 self._reset()
→ 517 data = self._next_data()
518 self._num_yielded += 1
519 if self._dataset_kind == _DatasetKind.Iterable and \~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/dataloader.py in _next_data(self)
555 def _next_data(self):
556 index = self._next_index() # may raise StopIteration
→ 557 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
558 if self._pin_memory:
559 data = _utils.pin_memory.pin_memory(data)~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
45 else:
46 data = self.dataset[possibly_batched_index]
—> 47 return self.collate_fn(data)~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py in default_data_collator(features, return_tensors)
64
65 if return_tensors == “pt”:
—> 66 return torch_default_data_collator(features)
67 elif return_tensors == “tf”:
68 return tf_default_data_collator(features)~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py in torch_default_data_collator(features)
80
81 if not isinstance(features[0], (dict, BatchEncoding)):
—> 82 features = [vars(f) for f in features]
83 first = features[0]
84 batch = {}~/anaconda3/envs/pytorch_latest_p37/lib/python3.7/site-packages/transformers/data/data_collator.py in (.0)
80
81 if not isinstance(features[0], (dict, BatchEncoding)):
—> 82 features = [vars(f) for f in features]
83 first = features[0]
84 batch = {}TypeError: vars() argument must have dict attribute
Any clue where I’m going wrong?