Hey Beginner Here, I was using BertForSequenceClassification using this code:
def train(model, optimizer, critertion=nn.BCELoss(),train_loader=train_iter,valid_loader=valid_iter,num_epochs=5
,eval_every = len(train_iter) // 2,file_path = "",best_valid_loss = float("Inf")):
# initialize running values
running_loss = 0.0
valid_running_loss = 0.0
global_step = 0
train_loss_list = []
valid_loss_list = []
global_steps_list = []
model.train()
for epoch in range(num_epochs):
for (labels, title, text, titletext), _ in train_loader:
labels = labels.type(torch.LongTensor)
labels = labels.to(device)
titletext = titletext.type(torch.LongTensor)
titletext = titletext.to(device)
print(labels.shape)
print(titletext.shape)
output = model(titletext, labels)
loss, _ = output
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
global_step += 1
#removed other part of the code which was for validation and testing. Error is generated in train loop.
but when the run the code it shows me the following error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-63-e4474bff9c36> in <module>
2 optimizer = optim.Adam(model.parameters(), lr=2e-5)
3
----> 4 train(model=model, optimizer=optimizer)
<ipython-input-62-e6359dc8788e> in train(model, optimizer, critertion, train_loader, valid_loader, num_epochs, eval_every, file_path, best_valid_loss)
20 print(titletext.shape)
21
---> 22 output = model(titletext, labels)
23 loss, _ = output
24
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
<ipython-input-59-3d3782128a40> in forward(self, text, label)
7
8 def forward(self, text, label):
----> 9 loss, text_fea = self.encoder(text, labels=label)[:2]
10
11 return loss, text_fea
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/opt/conda/lib/python3.7/site-packages/transformers/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels)
1158 else:
1159 loss_fct = CrossEntropyLoss()
-> 1160 loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
1161 outputs = (loss,) + outputs
1162
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/opt/conda/lib/python3.7/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
930 def forward(self, input, target):
931 return F.cross_entropy(input, target, weight=self.weight,
--> 932 ignore_index=self.ignore_index, reduction=self.reduction)
933
934
/opt/conda/lib/python3.7/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2315 if size_average is not None or reduce is not None:
2316 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2317 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
2318
2319
/opt/conda/lib/python3.7/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
2113 .format(input.size(0), target.size(0)))
2114 if dim == 2:
-> 2115 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2116 elif dim == 4:
2117 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
IndexError: Target 5213 is out of bounds.
meanwhile, the shape of label and titletext is [16] and [16,128] respectively.
- I tried to unsqueeze label too but that didn’t help.
- I tried to whether if the target index is missing or the label and target are not of same length but it didn’t help as well.
What can be done to fix this?
Full Code + Dataset : Click here