I am trying to use a Causal Language Model from BioGPT. However, I got a strange error.
Here are my steps:
First, I installed transformers
and sacremoses
:
!pip install transformers sacremoses -q
Then I executed the following code:
input_sequence = "Hello, I'm a language model,"
inputs = torch.as_tensor(tokenizer.encode(input_sequence)).unsqueeze(0).to(device)
past_key_values = None
count = 0
complete_token = []
with torch.no_grad():
while count<10:
count += 1
print("Iteration no.: " + str(count))
if count > 1:
inputs = input_token
model_out = model(input_ids=inputs.to(device), past_key_values=past_key_values)
logits = model_out.logits[:, -1, :]
past_key_values = model_out.past_key_values
topk_values, topk_indices = torch.topk(logits, 5)
log_probs = F.softmax(topk_values, dim=-1)
inputs_in_topk = torch.multinomial(log_probs, num_samples=1, replacement=True)
input_token = torch.gather(topk_indices, 1, inputs_in_topk)
complete_token.append(input_token)
And here is the error I got:
Iteration no.: 1
Iteration no.: 2
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_18990/2689790310.py in <cell line: 8>()
13 inputs = input_token
14
---> 15 model_out = model(input_ids=inputs.to(device), past_key_values=past_key_values)
16 logits = model_out.logits[:, -1, :]
17 past_key_values = model_out.past_key_values
~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/transformers/models/biogpt/modeling_biogpt.py in forward(self, input_ids, attention_mask, head_mask, inputs_embeds, past_key_values, labels, use_cache, output_attentions, output_hidden_states, return_dict)
677 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
678
--> 679 outputs = self.biogpt(
680 input_ids,
681 attention_mask=attention_mask,
~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/transformers/models/biogpt/modeling_biogpt.py in forward(self, input_ids, attention_mask, head_mask, inputs_embeds, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
589 )
590 else:
--> 591 layer_outputs = decoder_layer(
592 hidden_states,
593 attention_mask=attention_mask,
~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/transformers/models/biogpt/modeling_biogpt.py in forward(self, hidden_states, attention_mask, layer_head_mask, past_key_value, output_attentions, use_cache)
313 self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
314 # add present self-attn cache to positions 1,2 of present_key_value tuple
--> 315 hidden_states, self_attn_weights, present_key_value = self.self_attn(
316 hidden_states=hidden_states,
317 past_key_value=self_attn_past_key_value,
~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/transformers/models/biogpt/modeling_biogpt.py in forward(self, hidden_states, key_value_states, past_key_value, attention_mask, layer_head_mask, output_attentions)
211 if attention_mask is not None:
212 if attention_mask.size() != (bsz, 1, tgt_len, src_len):
--> 213 raise ValueError(
214 f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}"
215 )
ValueError: Attention mask should be of size (1, 1, 0, 12), but is torch.Size([1, 1, 1, 1])
So apparently, everything went fine in the first execution, but the in the second model call this error came up.
Do you know how to fix this?