I have copied the entire output:
AttributeError Traceback (most recent call last)
in <cell line: 4>()
3 inputs = tokenizer(“WHat is India?” , return_tensors = “pt”).to(“cuda”)
4 with torch.inference_mode():
----> 5 outputs = model.generate(**inputs, max_new_tokens = 100)
6 completion = tokenizer.batch_decode(outputs, skip_special_tokens = True)[0][len(query):]
7
26 frames
/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py in decorate_context(*args, **kwargs)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
→ 115 return func(*args, **kwargs)
116
117 return decorate_context
/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py in generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
1775
1776 # 13. run sample
→ 1777 return self.sample(
1778 input_ids,
1779 logits_processor=logits_processor,
/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py in sample(self, input_ids, logits_processor, stopping_criteria, logits_warper, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
2872
2873 # forward pass to get next token
→ 2874 outputs = self(
2875 **model_inputs,
2876 return_dict=True,
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
→ 1518 return self._call_impl(*args, **kwargs)
1519
1520 def _call_impl(self, *args, **kwargs):
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1527 return forward_call(*args, **kwargs)
1528
1529 try:
/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py in new_forward(module, *args, **kwargs)
163 output = module._old_forward(*args, **kwargs)
164 else:
→ 165 output = module._old_forward(*args, **kwargs)
166 return module._hf_hook.post_forward(module, output)
167
/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py in forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
1179
1180 # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
→ 1181 outputs = self.model(
1182 input_ids=input_ids,
1183 attention_mask=attention_mask,
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
→ 1518 return self._call_impl(*args, **kwargs)
1519
1520 def _call_impl(self, *args, **kwargs):
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1527 return forward_call(*args, **kwargs)
1528
1529 try:
/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py in new_forward(module, *args, **kwargs)
163 output = module._old_forward(*args, **kwargs)
164 else:
→ 165 output = module._old_forward(*args, **kwargs)
166 return module._hf_hook.post_forward(module, output)
167
/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py in forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict)
1066 )
1067 else:
→ 1068 layer_outputs = decoder_layer(
1069 hidden_states,
1070 attention_mask=attention_mask,
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
→ 1518 return self._call_impl(*args, **kwargs)
1519
1520 def _call_impl(self, *args, **kwargs):
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1527 return forward_call(*args, **kwargs)
1528
1529 try:
/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py in new_forward(module, *args, **kwargs)
163 output = module._old_forward(*args, **kwargs)
164 else:
→ 165 output = module._old_forward(*args, **kwargs)
166 return module._hf_hook.post_forward(module, output)
167
/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py in forward(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, **kwargs)
794
795 # Self Attention
→ 796 hidden_states, self_attn_weights, present_key_value = self.self_attn(
797 hidden_states=hidden_states,
798 attention_mask=attention_mask,
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
→ 1518 return self._call_impl(*args, **kwargs)
1519
1520 def _call_impl(self, *args, **kwargs):
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1527 return forward_call(*args, **kwargs)
1528
1529 try:
/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py in new_forward(module, *args, **kwargs)
163 output = module._old_forward(*args, **kwargs)
164 else:
→ 165 output = module._old_forward(*args, **kwargs)
166 return module._hf_hook.post_forward(module, output)
167
/usr/local/lib/python3.10/dist-packages/transformers/models/llama/modeling_llama.py in forward(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, **kwargs)
384
385 else:
→ 386 query_states = self.q_proj(hidden_states)
387 key_states = self.k_proj(hidden_states)
388 value_states = self.v_proj(hidden_states)
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs)
1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1517 else:
→ 1518 return self._call_impl(*args, **kwargs)
1519
1520 def _call_impl(self, *args, **kwargs):
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1525 or _global_backward_pre_hooks or _global_backward_hooks
1526 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1527 return forward_call(*args, **kwargs)
1528
1529 try:
/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py in new_forward(module, *args, **kwargs)
163 output = module._old_forward(*args, **kwargs)
164 else:
→ 165 output = module._old_forward(*args, **kwargs)
166 return module._hf_hook.post_forward(module, output)
167
/usr/local/lib/python3.10/dist-packages/bitsandbytes/nn/modules.py in forward(self, x)
448 self.bias.data = self.bias.data.to(x.dtype)
449
→ 450 out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
451
452 if not self.state.has_fp16_weights:
/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py in matmul(A, B, out, state, threshold, bias)
560 if threshold > 0.0:
561 state.threshold = threshold
→ 562 return MatMul8bitLt.apply(A, B, out, bias, state)
563
564
/usr/local/lib/python3.10/dist-packages/torch/autograd/function.py in apply(cls, *args, **kwargs)
537 # See NOTE: [functorch vjp and autograd interaction]
538 args = _functorch.utils.unwrap_dead_wrappers(args)
→ 539 return super().apply(*args, **kwargs) # type: ignore[misc]
540
541 if cls.setup_context == _SingleLevelFunction.setup_context:
/usr/local/lib/python3.10/dist-packages/bitsandbytes/autograd/_functions.py in forward(ctx, A, B, out, bias, state)
342 else:
343 if not state.has_fp16_weights and state.CxB is None and using_igemmlt:
→ 344 state.CxB, state.SB = F.transform(state.CB, to_order=formatB)
345 subA = None
346
/usr/local/lib/python3.10/dist-packages/bitsandbytes/functional.py in transform(A, to_order, from_order, out, transpose, state, ld)
2194
2195 def transform(A, to_order, from_order=‘row’, out=None, transpose=False, state=None, ld=None):
→ 2196 prev_device = pre_call(A.device)
2197 if state is None: state = (A.shape, from_order)
2198 else: from_order = state[1]
AttributeError: ‘NoneType’ object has no attribute ‘device’