I have this code to quantize a large language model and save the quantized model:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
model_name = 'stabilityai/stablelm-2-zephyr-1_6b'
def load_quantized_model(model_name: str):
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
load_in_4bit=True,
torch_dtype=torch.bfloat16,
quantization_config=bnb_config,
trust_remote_code=True
)
return model
def initialize_tokenizer(model_name: str):
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
tokenizer.bos_token_id = 1 # Set beginning of sentence token id
return tokenizer
tokenizer = initialize_tokenizer(model_name)
SAVED_MODEL_NAME = 'gpt-custom'
model.save_pretrained(SAVED_MODEL_NAME)
Now, I have a 1.2GB (from ~4gb) model inside the directory gpt-custom.
So I download it in my laptop with CPU only and this is my code:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
model_name = 'stabilityai/stablelm-2-zephyr-1_6b'
model = AutoModelForCausalLM.from_pretrained(
'quantized',
device_map="auto",
trust_remote_code=True
)
def initialize_tokenizer(model_name: str):
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
tokenizer.bos_token_id = 1 # Set beginning of sentence token id
return tokenizer
model = load_quantized_model('gpt-custom')
tokenizer = initialize_tokenizer(model_name)
question = 'how are you feeling?'
prompt = [{'role': 'user', 'content': question}]
inputs = tokenizer.apply_chat_template(
prompt,
add_generation_prompt=True,
return_tensors='pt'
)
tokens = model.generate(
inputs.to(model.device),
max_new_tokens=64,
temperature=0.5,
do_sample=True
)
print(tokenizer.decode(tokens[0], skip_special_tokens=True))
But I am getting an error NameError: name 'torch' is not defined
but I already installed torch using pip install torch
and even tried pip install --upgrade torch
. There was also this warning before the error:
Detected the presence of a `quantization_config` attribute in the model's configuration but you don't have the correct `bitsandbytes` version to support 4 and 8 bit serialization. Please install the latest version of `bitsandbytes` with `pip install --upgrade bitsandbytes`.
And I have also tried pip install --upgrade bitsandbytes
but still get the same error.
This is the full error stack:
Traceback (most recent call last):
File "/var/www/html/test_llm/test.py", line 6, in <module>
model = AutoModelForCausalLM.from_pretrained(
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 562, in from_pretrained
return model_class.from_pretrained(
File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 3856, in from_pretrained
) = cls._load_pretrained_model(
File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 4290, in _load_pretrained_model
new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 839, in _load_state_dict_into_meta_model
set_module_quantized_tensor_to_device(model, param_name, param_device, value=param)
File "/usr/local/lib/python3.10/dist-packages/transformers/integrations/bitsandbytes.py", line 58, in set_module_quantized_tensor_to_device
if old_value.device == torch.device("meta") and device not in ["meta", torch.device("meta")] and value is None:
NameError: name 'torch' is not defined