AssertionError: Torch not compiled with CUDA enabled

Hi,
I am new to transformers.
I am trying to run on M1 mac using CPU for simplicity (if you can tell me how to use M1 GPU that would be great).

Here is the code for the tokenizer and model:

    tokenizer = LlamaTokenizer.from_pretrained("chainyo/alpaca-lora-7b",  device_map={"": 'cpu'})
    model = LlamaForCausalLM.from_pretrained(
        "chainyo/alpaca-lora-7b",
        load_in_8bit=True,
        torch_dtype=torch.float16,
        device_map={"": 'cpu'}
        # device_map="auto",
    )

Then, when I run the following method, this happens

def reply(tokenizer, model, prompt):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    input_ids = input_ids.to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
        )
    text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
    return text

AssertionError: Torch not compiled with CUDA enabled

2023
huggingface-hub         0.14.1    Client library to download and publish mo...
langchain               0.0.142   Building applications with LLMs through c...
sentence-transformers   2.2.2     Multilingual text embeddings
torch                   2.0.1     Tensors and Dynamic neural networks in Py...
torchvision             0.15.2    image and video datasets and models for t...
transformers            4.29.0    State-of-the-art Machine Learning for JAX...

full error log:

    outputs = model.generate(
  File "/.venv/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/transformers/generation/utils.py", line 1604, in generate
    return self.beam_search(
  File "/.venv/lib/python3.8/site-packages/transformers/generation/utils.py", line 2902, in beam_search
    outputs = self(
  File "/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 688, in forward
    outputs = self.model(
  File "/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 578, in forward
    layer_outputs = decoder_layer(
  File "/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 293, in forward
    hidden_states, self_attn_weights, present_key_value = self.self_attn(
  File "/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py", line 197, in forward
    query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
  File "/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
  File "/.venv/lib/python3.8/site-packages/bitsandbytes/nn/modules.py", line 388, in forward
    out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
  File "/.venv/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 559, in matmul
    return MatMul8bitLt.apply(A, B, out, bias, state)
  File "/.venv/lib/python3.8/site-packages/torch/autograd/function.py", line 506, in apply
    return super().apply(*args, **kwargs)  # type: ignore[misc]
  File "/.venv/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 293, in forward
    using_igemmlt = supports_igemmlt(A.device) and not state.force_no_igemmlt
  File "/.venv/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py", line 226, in supports_igemmlt
    if torch.cuda.get_device_capability(device=device) < (7, 5):
  File "/.venv/lib/python3.8/site-packages/torch/cuda/__init__.py", line 381, in get_device_capability
    prop = get_device_properties(device)
  File "/.venv/lib/python3.8/site-packages/torch/cuda/__init__.py", line 395, in get_device_properties
    _lazy_init()  # will define _get_device_properties
  File "/.venv/lib/python3.8/site-packages/torch/cuda/__init__.py", line 239, in _lazy_init
    raise AssertionError("Torch not compiled with CUDA enabled")