Hi,
I am new to transformers
.
I am trying to run on M1 mac using CPU for simplicity (if you can tell me how to use M1 GPU that would be great).
Here is the code for the tokenizer and model:
tokenizer = LlamaTokenizer.from_pretrained("chainyo/alpaca-lora-7b", device_map={"": 'cpu'})
model = LlamaForCausalLM.from_pretrained(
"chainyo/alpaca-lora-7b",
load_in_8bit=True,
torch_dtype=torch.float16,
device_map={"": 'cpu'}
# device_map="auto",
)
Then, when I run the following method, this happens
def reply(tokenizer, model, prompt):
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
input_ids = input_ids.to(model.device)
with torch.no_grad():
outputs = model.generate(
input_ids=input_ids,
generation_config=generation_config,
return_dict_in_generate=True,
output_scores=True,
)
text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
return text
AssertionError: Torch not compiled with CUDA enabled
2023
huggingface-hub 0.14.1 Client library to download and publish mo...
langchain 0.0.142 Building applications with LLMs through c...
sentence-transformers 2.2.2 Multilingual text embeddings
torch 2.0.1 Tensors and Dynamic neural networks in Py...
torchvision 0.15.2 image and video datasets and models for t...
transformers 4.29.0 State-of-the-art Machine Learning for JAX...
full error log:
outputs = model.generate(
File "/.venv/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/transformers/generation/utils.py", line 1604, in generate
return self.beam_search(
File "/.venv/lib/python3.8/site-packages/transformers/generation/utils.py", line 2902, in beam_search
outputs = self(
File "/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 688, in forward
outputs = self.model(
File "/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 578, in forward
layer_outputs = decoder_layer(
File "/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 293, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 197, in forward
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
File "/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/.venv/lib/python3.8/site-packages/bitsandbytes/nn/modules.py", line 388, in forward
out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
File "/.venv/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 559, in matmul
return MatMul8bitLt.apply(A, B, out, bias, state)
File "/.venv/lib/python3.8/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/.venv/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 293, in forward
using_igemmlt = supports_igemmlt(A.device) and not state.force_no_igemmlt
File "/.venv/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 226, in supports_igemmlt
if torch.cuda.get_device_capability(device=device) < (7, 5):
File "/.venv/lib/python3.8/site-packages/torch/cuda/__init__.py", line 381, in get_device_capability
prop = get_device_properties(device)
File "/.venv/lib/python3.8/site-packages/torch/cuda/__init__.py", line 395, in get_device_properties
_lazy_init() # will define _get_device_properties
File "/.venv/lib/python3.8/site-packages/torch/cuda/__init__.py", line 239, in _lazy_init
raise AssertionError("Torch not compiled with CUDA enabled")