Goal: I want to get the [CLS] values, but I am getting an error when I call DistilBertModel.
My code:
import transformers as ppb
m, t, p = (ppb.DistilBertModel, ppb.DistilBertTokenizerFast, 'distilbert-base-uncased')
tokenizer = t.from_pretrained(pretrained_weights, cache_dir=<path>)
model = m.from_pretrained(pretrained_weights, from_tf=True, cache_dir=<path>)
# [beginning of EDIT]
def my_encode(tokenizer, texts, max_length=MAX_LENGTH):
inputs = tokenizer.batch_encode_plus(texts,
max_length=max_length,
padding='longest',
truncation=True,
return_attention_mask=True,
return_token_type_ids=False,
return_tensors="pt"
)
return inputs
tokenizer_output = my_encode(tokenizer, pandas_df['raw_text'].tolist())
# [end of EDIT]
I am getting error when I call the model with ‘tokenizer_output’, which is a ‘transformers.tokenization_utils_base.BatchEncoding’:
result = model(**tokenizer_output)
The error is:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<timed exec> in <module>
~/miniconda3/envs/x/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
~/miniconda3/envs/x/lib/python3.7/site-packages/transformers/models/distilbert/modeling_distilbert.py in forward(self, input_ids, attention_mask, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict)
485 output_attentions=output_attentions,
486 output_hidden_states=output_hidden_states,
--> 487 return_dict=return_dict,
488 )
489
~/miniconda3/envs/x/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
~/miniconda3/envs/x/lib/python3.7/site-packages/transformers/models/distilbert/modeling_distilbert.py in forward(self, x, attn_mask, head_mask, output_attentions, output_hidden_states, return_dict)
305
306 layer_outputs = layer_module(
--> 307 x=hidden_state, attn_mask=attn_mask, head_mask=head_mask[i], output_attentions=output_attentions
308 )
309 hidden_state = layer_outputs[-1]
~/miniconda3/envs/x/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
~/miniconda3/envs/x/lib/python3.7/site-packages/transformers/models/distilbert/modeling_distilbert.py in forward(self, x, attn_mask, head_mask, output_attentions)
262
263 # Feed Forward Network
--> 264 ffn_output = self.ffn(sa_output) # (bs, seq_length, dim)
265 ffn_output = self.output_layer_norm(ffn_output + sa_output) # (bs, seq_length, dim)
266
~/miniconda3/envs/x/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
~/miniconda3/envs/x/lib/python3.7/site-packages/transformers/models/distilbert/modeling_distilbert.py in forward(self, input)
213
214 def forward(self, input):
--> 215 return apply_chunking_to_forward(self.ff_chunk, self.chunk_size_feed_forward, self.seq_len_dim, input)
216
217 def ff_chunk(self, input):
~/miniconda3/envs/x/lib/python3.7/site-packages/transformers/modeling_utils.py in apply_chunking_to_forward(forward_fn, chunk_size, chunk_dim, *input_tensors)
1815 return torch.cat(output_chunks, dim=chunk_dim)
1816
-> 1817 return forward_fn(*input_tensors)
~/miniconda3/envs/x/lib/python3.7/site-packages/transformers/models/distilbert/modeling_distilbert.py in ff_chunk(self, input)
217 def ff_chunk(self, input):
218 x = self.lin1(input)
--> 219 x = self.activation(x)
220 x = self.lin2(x)
221 x = self.dropout(x)
~/miniconda3/envs/x/lib/python3.7/site-packages/torch/nn/functional.py in gelu(input)
1457 if has_torch_function_unary(input):
1458 return handle_torch_function(gelu, (input,), input)
-> 1459 return torch._C._nn.gelu(input)
1460
1461
RuntimeError: [enforce fail at CPUAllocator.cpp:67] . DefaultCPUAllocator: can't allocate memory: you tried to allocate 2826240000 bytes. Error code 12 (Cannot allocate memory)
I believe if call model using batch would solve my problem. But how I can I use batch_size here?
result = model(**tokenizer_output)
Is there another way to get the [CLS] (word representation)?
Thanks in advance!