from google.colab import userdata
token = userdata.get(âHF_TOKENâ)
import os
os.environ[âHF_TOKENâ] = token
from huggingface_hub import login
login(token=token,add_to_git_credential=True,new_session=False,write_permission=False)
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,bnb_4bit_quant_type=ânf4â,bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME,use_fast=True)
tokenizer.add_special_tokens({âpad_tokenâ: PAD_TOKEN})
tokenizer.padding_side=ârightâ
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
quantization_config=quantization_config,
device_map=âautoâ
)
model.resize_token_embeddings(len(tokenizer),pad_to_multiple_of=8)
Error
OSError: You are trying to access a gated repo.
Make sure to have access to it at meta-llama/Llama-3.1-8B-Instruct ¡ Hugging Face.
401 Client Error. (Request ID: Root=1-675c012c-393db96363b1271b2d444e43;53abc514-2119-40ef-9d55-a71e12fcf417)
Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.1-8B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in.