Hi,
Iโm trying to create a sentence pair classification with a Bert-based model (pre-trained for Hebrew):
cls_model = BertForSequenceClassification.from_pretrained(
'onlplab/alephbert-base',
num_labels = 2,
output_attentions = False,
output_hidden_states = False,
return_dict = False
)
I use this tokenizer:
alephbert_tokenizer = BertTokenizerFast.from_pretrained('onlplab/alephbert-base')
For a small running example, I run it on two lists of sentences and corresponding labels for every pair of sentences. But I get this error:
IndexError: index out of range in self
I checked that the vocab_size of the tokenizer is of the same size as the modelโs , as well as the max_length = 512 of the input pair.
Unfortunately, I am stuck. Any help would be highly appreciated.
The code Iโm running:
sentence_a = ['ืฉื ื ืืืื ืฉืื ืืื','ืฉื ืืื ืฆืืื ืืื','ืืื ื ืฉืืจื?','ืฉืืืฉื ืืืื ืฉืื ืื ืืจ', 'ืืื ืฆืื ืืื']
sentence_b = ['ืืืช ืฉืชืืื ืฉืืืฉ','ืืงืื ืืืื ืืืื','ืจืง ืืฉืื ืืคืืคืื ืืืช ืฉืชืืื ืฉืืืฉ','ืืืืืจื ืืืจ ืืฉืื ืฉืืืฉื ืืืืื', 'ืื ืืืื ืืื ืฉืชื ืืืช ืฉืชืืื ืฉืืืฉ']
labels = torch.tensor([1,0,1,1,0])
# tokenize_pairs
tokens = alephbert_tokenizer(sentence_a, sentence_b,
padding = 'max_length',
max_length = 512,
truncation = True,
return_tensors = 'pt')
input_ids = tokens['input_ids']
token_type_ids = tokens['token_type_ids']
attention_mask = tokens['attention_mask']
cls_model.train()
cls_model.zero_grad()
# Forward pass.
outputs, logits = cls_model(input_ids=input_ids, #.squeeze(),
attention_mask=attention_mask, #.squeeze(),
token_type_ids=token_type_ids, #.squeeze(),
labels=labels, #.squeeze(),
return_dict=False
)
here is the full trackback:
IndexError Traceback (most recent call last)
Input In [137], in <cell line: 3>()
1 cls_model.zero_grad()
2 # Forward pass.
----> 3 outputs, logits = cls_model(input_ids=input_ids.squeeze(),
4 attention_mask=attention_mask.squeeze(),
5 token_type_ids=token_type_ids.squeeze(),
6 labels=labels.squeeze(),
7 return_dict=False
8 )
File C:\Users\BUDBUDIO\Anaconda3\lib\site-packages\torch\nn\modules\module.py:889, in Module._call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
892 self._forward_hooks.values()):
893 hook_result = hook(self, input, result)
File C:\Users\BUDBUDIO\Anaconda3\lib\site-packages\transformers\models\bert\modeling_bert.py:1554, in BertForSequenceClassification.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1546 r"""
1547 labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1548 Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
1549 config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1550 `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1551 """
1552 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-> 1554 outputs = self.bert(
1555 input_ids,
1556 attention_mask=attention_mask,
1557 token_type_ids=token_type_ids,
1558 position_ids=position_ids,
1559 head_mask=head_mask,
1560 inputs_embeds=inputs_embeds,
1561 output_attentions=output_attentions,
1562 output_hidden_states=output_hidden_states,
1563 return_dict=return_dict,
1564 )
1566 pooled_output = outputs[1]
1568 pooled_output = self.dropout(pooled_output)
File C:\Users\BUDBUDIO\Anaconda3\lib\site-packages\torch\nn\modules\module.py:889, in Module._call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
892 self._forward_hooks.values()):
893 hook_result = hook(self, input, result)
File C:\Users\BUDBUDIO\Anaconda3\lib\site-packages\transformers\models\bert\modeling_bert.py:1010, in BertModel.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
1003 # Prepare head mask if needed
1004 # 1.0 in head_mask indicate we keep the head
1005 # attention_probs has shape bsz x n_heads x N x N
1006 # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
1007 # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
1008 head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
-> 1010 embedding_output = self.embeddings(
1011 input_ids=input_ids,
1012 position_ids=position_ids,
1013 token_type_ids=token_type_ids,
1014 inputs_embeds=inputs_embeds,
1015 past_key_values_length=past_key_values_length,
1016 )
1017 encoder_outputs = self.encoder(
1018 embedding_output,
1019 attention_mask=extended_attention_mask,
(...)
1027 return_dict=return_dict,
1028 )
1029 sequence_output = encoder_outputs[0]
File C:\Users\BUDBUDIO\Anaconda3\lib\site-packages\torch\nn\modules\module.py:889, in Module._call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
892 self._forward_hooks.values()):
893 hook_result = hook(self, input, result)
File C:\Users\BUDBUDIO\Anaconda3\lib\site-packages\transformers\models\bert\modeling_bert.py:236, in BertEmbeddings.forward(self, input_ids, token_type_ids, position_ids, inputs_embeds, past_key_values_length)
234 if inputs_embeds is None:
235 inputs_embeds = self.word_embeddings(input_ids)
--> 236 token_type_embeddings = self.token_type_embeddings(token_type_ids)
238 embeddings = inputs_embeds + token_type_embeddings
239 if self.position_embedding_type == "absolute":
File C:\Users\BUDBUDIO\Anaconda3\lib\site-packages\torch\nn\modules\module.py:889, in Module._call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
892 self._forward_hooks.values()):
893 hook_result = hook(self, input, result)
File C:\Users\BUDBUDIO\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py:156, in Embedding.forward(self, input)
155 def forward(self, input: Tensor) -> Tensor:
--> 156 return F.embedding(
157 input, self.weight, self.padding_idx, self.max_norm,
158 self.norm_type, self.scale_grad_by_freq, self.sparse)
File C:\Users\BUDBUDIO\Anaconda3\lib\site-packages\torch\nn\functional.py:1916, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1910 # Note [embedding_renorm set_grad_enabled]
1911 # XXX: equivalent to
1912 # with torch.no_grad():
1913 # torch.embedding_renorm_
1914 # remove once script supports set_grad_enabled
1915 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1916 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
IndexError: index out of range in self