Hey guys, I want to apply 4-bit GPTQ on the btml-3b model. I hesitate between using AutoGPTQ vs bitesandbytes for it, I made two separate code for it, could you tell me which one should I use, and if they would work:
AutoGPQ:
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
import torch
model_id = "C:\Users\William\Documents\STEM.AI\btml-3b"
tokenizer = AutoTokenizer.from_pretrained(model_id)
quantization_config = GPTQConfig(
bits=4,
group_size=128,
dataset="wikitext2",
desc_act=False,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
quant_model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config, device_map='auto')
quant_model.model.decoder.layers[0].self_attn.q_proj.__dict__
save_directory = "C:\Users\William\Documents\STEM.AI\btml-3b_GPTQ"
quant_model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)
bitesandbytes:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
model_id = "C:\\Users\\William\\Documents\\STEM.AI\\btml-3b"
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
quant_model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=nf4_config)
save_directory = "C:\\path_to_save_quantized_model"
quant_model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)