Try this and let me know:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
from accelerate import init_empty_weights
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, file_utils
from accelerate import init_empty_weights, load_checkpoint_and_dispatch, infer_auto_device_map
config = AutoConfig.from_pretrained(â./model_pathâ,trust_remote_code=True)
config.attn_config[âattn_implâ] = âtritonâ
config.init_device = âcuda:0â
model = AutoModelForCausalLM.from_pretrained(âtiiuae/falcon-7b-instructâ, config=config, torch_dtype=torch.bfloat16, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(âEleutherAI/gpt-neox-20bâ, padding_side=âleftâ)
tokenizer.pad_token_id = tokenizer.eos_token_id
print(âdownlaoded tokenizerâ)
model.save_pretrained(â./model_path/â)
tokenizer.save_pretrained(â./model_path/â)
print(âsaved tokenizerâ)
checkpoint =â./model_pathâ
with init_empty_weights():
config = AutoConfig.from_pretrained(â./model_pathâ,trust_remote_code=True)
model = AutoModelForCausalLM.from_config(config,trust_remote_code=True)
max_memory = {0: â8GiBâ, âcpuâ: â4GiBâ}
model = load_checkpoint_and_dispatch(model, checkpoint, device_map=âautoâ, dtype=torch.bfloat16, offload_folder=âoffloadâ)
model = model.tie_weights()
tokenizer = AutoTokenizer.from_pretrained(checkpoint, padding_side=âleftâ)
print(type(model))