***** Running training *****
Num examples = 111530
Num Epochs = 3
Instantaneous batch size per device = 8
Total train batch size (w. parallel, distributed & accumulation) = 8
Gradient Accumulation steps = 1
Total optimization steps = 41826
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-28-3435b262f1ae> in <module>()
----> 1 trainer.train()
11 frames
/usr/local/lib/python3.7/dist-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1284 tr_loss += self.training_step(model, inputs)
1285 else:
-> 1286 tr_loss += self.training_step(model, inputs)
1287 self.current_flos += float(self.floating_point_ops(inputs))
1288
/usr/local/lib/python3.7/dist-packages/transformers/trainer.py in training_step(self, model, inputs)
1777 loss = self.compute_loss(model, inputs)
1778 else:
-> 1779 loss = self.compute_loss(model, inputs)
1780
1781 if self.args.n_gpu > 1:
/usr/local/lib/python3.7/dist-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
1809 else:
1810 labels = None
-> 1811 outputs = model(**inputs)
1812 # Save past state if it exists
1813 # TODO: this needs to be fixed and made cleaner later.
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, labels, output_attentions, output_hidden_states, return_dict)
1338 output_attentions=output_attentions,
1339 output_hidden_states=output_hidden_states,
-> 1340 return_dict=return_dict,
1341 )
1342
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
987 token_type_ids=token_type_ids,
988 inputs_embeds=inputs_embeds,
--> 989 past_key_values_length=past_key_values_length,
990 )
991 encoder_outputs = self.encoder(
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, token_type_ids, position_ids, inputs_embeds, past_key_values_length)
213
214 if inputs_embeds is None:
--> 215 inputs_embeds = self.word_embeddings(input_ids)
216 token_type_embeddings = self.token_type_embeddings(token_type_ids)
217
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/sparse.py in forward(self, input)
158 return F.embedding(
159 input, self.weight, self.padding_idx, self.max_norm,
--> 160 self.norm_type, self.scale_grad_by_freq, self.sparse)
161
162 def extra_repr(self) -> str:
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
2041 # remove once script supports set_grad_enabled
2042 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2043 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
2044
2045
IndexError: index out of range in self