Iβm trying to finetune the CLIP model for my private dataset. I have written CustomDataset for that, the data loading is working well but when I try to train the model with Trainer, It raises IndexError: index out of range in self
Custom Dataset
β```
class ClassifierDataset(Dataset):
def init(self, processor, folds: int, mode=βtrainβ):
super(ClassifierDataset, self).init()
self.mode = mode
self.fold = folds
self.processor = processor
self.max_target_length = 32
self.train_transform = A.Compose(
[
A.CLAHE(),
A.RandomRotate90(),
A.Transpose(),
A.ShiftScaleRotate(
shift_limit=0.0625, scale_limit=0.50, rotate_limit=45, p=0.75
),
A.Blur(blur_limit=3),
A.OpticalDistortion(),
A.GridDistortion(),
A.HueSaturationValue(),
A.VerticalFlip(),
A.HorizontalFlip(),
A.Normalize(),
ToTensorV2(),
]
)
self.val_transform = A.Compose(
[
A.Normalize(),
ToTensorV2(),
]
)
self.df = pd.read_csv(str(Path.cwd() / "data_new.csv"))
self.train_df = self.df[self.df.folds != self.fold].reset_index(drop=True)
self.val_df = self.df[self.df.folds == self.fold].reset_index(drop=True)
self.dff = self.train_df if self.mode == "train" else self.val_df
def __len__(self):
return len(self.dff)
def __getitem__(self, index):
img_name = self.dff.loc[index, "paths"]
text = self.dff.loc[index, "corrected_prompts"]
img = Image.open(img_name).convert("RGB")
pixel_values = self.processor.feature_extractor(img, return_tensors="pt").pixel_values
labels = self.processor.tokenizer(text,
padding="max_length",
max_length=77,
truncation=True).input_ids
labels = [label if label != self.processor.tokenizer.pad_token_id else -100 for label in labels]
return {"input_ids":torch.tensor(labels), "pixel_values":pixel_values.squeeze()}
## Trainer
default_data_collator = DefaultDataCollator()
training_args = TrainingArguments(
output_dir=β./resultsβ,
num_train_epochs=3,
warmup_steps=500,
weight_decay=0.01,
logging_dir=β./logsβ,
logging_steps=10,
evaluation_strategy=βepochβ
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_data,
eval_dataset=val_data,
data_collator=default_data_collator
)
trainer.train()
## The Error StackTrace
in <cell line: 22>:22 β
β β
β 19 β data_collator=default_data_collator β
β 20 ) β
β 21 β
β β± 22 trainer.train() β
β 23 β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/transformers/trainer.py:1543 in train β
β β
β 1540 β β inner_training_loop = find_executable_batch_size( β
β 1541 β β β self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size β
β 1542 β β ) β
β β± 1543 β β return inner_training_loop( β
β 1544 β β β args=args, β
β 1545 β β β resume_from_checkpoint=resume_from_checkpoint, β
β 1546 β β β trial=trial, β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/transformers/trainer.py:1791 in β
β _inner_training_loop β
β β
β 1788 β β β β β with model.no_sync(): β
β 1789 β β β β β β tr_loss_step = self.training_step(model, inputs) β
β 1790 β β β β else: β
β β± 1791 β β β β β tr_loss_step = self.training_step(model, inputs) β
β 1792 β β β β β
β 1793 β β β β if ( β
β 1794 β β β β β args.logging_nan_inf_filter β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/transformers/trainer.py:2539 in training_step β
β β
β 2536 β β β return loss_mb.reduce_mean().detach().to(self.args.device) β
β 2537 β β β
β 2538 β β with self.compute_loss_context_manager(): β
β β± 2539 β β β loss = self.compute_loss(model, inputs) β
β 2540 β β β
β 2541 β β if self.args.n_gpu > 1: β
β 2542 β β β loss = loss.mean() # mean() to average on multi-gpu parallel training β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/transformers/trainer.py:2571 in compute_loss β
β β
β 2568 β β β labels = inputs.pop(βlabelsβ) β
β 2569 β β else: β
β 2570 β β β labels = None β
β β± 2571 β β outputs = model(**inputs) β
β 2572 β β # Save past state if it exists β
β 2573 β β # TODO: this needs to be fixed and made cleaner later. β
β 2574 β β if self.args.past_index >= 0: β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/torch/nn/modules/module.py:1110 in _call_impl β
β β
β 1107 β β # this function, and just call forward. β
β 1108 β β if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o β
β 1109 β β β β or _global_forward_hooks or _global_forward_pre_hooks): β
β β± 1110 β β β return forward_call(*input, **kwargs) β
β 1111 β β # Do not call functions when jit is used β
β 1112 β β full_backward_hooks, non_full_backward_hooks = , β
β 1113 β β if self._backward_hooks or _global_backward_hooks: β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:1125 β
β in forward β
β β
β 1122 β β β return_dict=return_dict, β
β 1123 β β ) β
β 1124 β β β
β β± 1125 β β text_outputs = self.text_model( β
β 1126 β β β input_ids=input_ids, β
β 1127 β β β attention_mask=attention_mask, β
β 1128 β β β position_ids=position_ids, β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/torch/nn/modules/module.py:1110 in _call_impl β
β β
β 1107 β β # this function, and just call forward. β
β 1108 β β if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o β
β 1109 β β β β or _global_forward_hooks or _global_forward_pre_hooks): β
β β± 1110 β β β return forward_call(*input, **kwargs) β
β 1111 β β # Do not call functions when jit is used β
β 1112 β β full_backward_hooks, non_full_backward_hooks = , β
β 1113 β β if self._backward_hooks or _global_backward_hooks: β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:712 β
β in forward β
β β
β 709 β β input_shape = input_ids.size() β
β 710 β β input_ids = input_ids.view(-1, input_shape[-1]) β
β 711 β β β
β β± 712 β β hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids) β
β 713 β β β
β 714 β β bsz, seq_len = input_shape β
β 715 β β # CLIPβs text model uses causal mask, prepare it here. β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/torch/nn/modules/module.py:1110 in _call_impl β
β β
β 1107 β β # this function, and just call forward. β
β 1108 β β if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o β
β 1109 β β β β or _global_forward_hooks or _global_forward_pre_hooks): β
β β± 1110 β β β return forward_call(*input, **kwargs) β
β 1111 β β # Do not call functions when jit is used β
β 1112 β β full_backward_hooks, non_full_backward_hooks = , β
β 1113 β β if self._backward_hooks or _global_backward_hooks: β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:227 β
β in forward β
β β
β 224 β β β position_ids = self.position_ids[:, :seq_length] β
β 225 β β β
β 226 β β if inputs_embeds is None: β
β β± 227 β β β inputs_embeds = self.token_embedding(input_ids) β
β 228 β β β
β 229 β β position_embeddings = self.position_embedding(position_ids) β
β 230 β β embeddings = inputs_embeds + position_embeddings β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/torch/nn/modules/module.py:1110 in _call_impl β
β β
β 1107 β β # this function, and just call forward. β
β 1108 β β if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o β
β 1109 β β β β or _global_forward_hooks or _global_forward_pre_hooks): β
β β± 1110 β β β return forward_call(*input, **kwargs) β
β 1111 β β # Do not call functions when jit is used β
β 1112 β β full_backward_hooks, non_full_backward_hooks = , β
β 1113 β β if self.backward_hooks or global_backward_hooks: β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/torch/nn/modules/sparse.py:158 in forward β
β β
β 155 β β β β self.weight[self.padding_idx].fill(0) β
β 156 β β
β 157 β def forward(self, input: Tensor) β Tensor: β
β β± 158 β β return F.embedding( β
β 159 β β β input, self.weight, self.padding_idx, self.max_norm, β
β 160 β β β self.norm_type, self.scale_grad_by_freq, self.sparse) β
β 161 β
β β
β /opt/conda/envs/blip/lib/python3.8/site-packages/torch/nn/functional.py:2183 in embedding β
β β
β 2180 β β # torch.embedding_renorm β
β 2181 β β # remove once script supports set_grad_enabled β
β 2182 β β no_grad_embedding_renorm(weight, input, max_norm, norm_type) β
β β± 2183 β return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) β
β 2184 β
β 2185 β
β 2186 def embedding_bag( β
β°βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ―
IndexError: index out of range in self