Issues with Downloading Llama2 in Jupyter Notebook

I’ve recently been having some problems loading up some Llama2 models on HuggingFace through a Jupyter Notebook.

Even though I’ve already downloaded them and stored them in the specified cache directory, I’ve been unable to load up the model. I get the following error related to the config.json file not being found.

from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
import torch

hf_token = '<my_token>'

login(token = hf_token)

model_name = "meta-llama/Llama-2-7b-hf"
my_cache_dir = "<my_cache_dir>"

cuda_device = torch.device("cuda")

tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir= my_cache_dir)
model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir = my_cache_dir).to(cuda_device)

input_text = "Once upon a time, "
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(cuda_device)

output = model.generate(input_ids, max_length=20, temperature=0.7)


print(output)

OSError                                   Traceback (most recent call last)
Cell In[1], line 15
     11 model_name = "meta-llama/Llama-2-7b-hf"
     13 cuda_device = torch.device("cuda")
---> 15 tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
     16 model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir).to(cuda_device)
     18 input_text = "Once upon a time, "

File ~/.local/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py:819, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
    817 if config_tokenizer_class is None:
    818     if not isinstance(config, PretrainedConfig):
--> 819         config = AutoConfig.from_pretrained(
    820             pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
    821         )
    822     config_tokenizer_class = config.tokenizer_class
    823     if hasattr(config, "auto_map") and "AutoTokenizer" in config.auto_map:

File ~/.local/lib/python3.11/site-packages/transformers/models/auto/configuration_auto.py:928, in AutoConfig.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
    925 trust_remote_code = kwargs.pop("trust_remote_code", None)
    926 code_revision = kwargs.pop("code_revision", None)
--> 928 config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
    929 has_remote_code = "auto_map" in config_dict and "AutoConfig" in config_dict["auto_map"]
    930 has_local_code = "model_type" in config_dict and config_dict["model_type"] in CONFIG_MAPPING

File ~/.local/lib/python3.11/site-packages/transformers/configuration_utils.py:631, in PretrainedConfig.get_config_dict(cls, pretrained_model_name_or_path, **kwargs)
    629 original_kwargs = copy.deepcopy(kwargs)
    630 # Get config dict associated with the base config file
--> 631 config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
    632 if "_commit_hash" in config_dict:
    633     original_kwargs["_commit_hash"] = config_dict["_commit_hash"]

File ~/.local/lib/python3.11/site-packages/transformers/configuration_utils.py:686, in PretrainedConfig._get_config_dict(cls, pretrained_model_name_or_path, **kwargs)
    682 configuration_file = kwargs.pop("_configuration_file", CONFIG_NAME)
    684 try:
    685     # Load from local folder or from cache or download from model Hub and cache
--> 686     resolved_config_file = cached_file(
    687         pretrained_model_name_or_path,
    688         configuration_file,
    689         cache_dir=cache_dir,
    690         force_download=force_download,
    691         proxies=proxies,
    692         resume_download=resume_download,
    693         local_files_only=local_files_only,
    694         token=token,
    695         user_agent=user_agent,
    696         revision=revision,
    697         subfolder=subfolder,
    698         _commit_hash=commit_hash,
    699     )
    700     commit_hash = extract_commit_hash(resolved_config_file, commit_hash)
    701 except EnvironmentError:
    702     # Raise any environment error raise by `cached_file`. It will have a helpful error message adapted to
    703     # the original exception.

File ~/.local/lib/python3.11/site-packages/transformers/utils/hub.py:369, in cached_file(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)
    367 if not os.path.isfile(resolved_file):
    368     if _raise_exceptions_for_missing_entries:
--> 369         raise EnvironmentError(
    370             f"{path_or_repo_id} does not appear to have a file named {full_filename}. Checkout "
    371             f"'https://huggingface.co/{path_or_repo_id}/tree/{revision}' for available files."
    372         )
    373     else:
    374         return None

OSError: meta-llama/Llama-2-7b-hf does not appear to have a file named config.json. Checkout 'https://huggingface.co/meta-llama/Llama-2-7b-hf/tree/None' for available files.

I don’t have get this error when I run the code a plain Python file, but I have the same environment and Python version on my Jupyter Notebook.

I also only get this error for the Llama models specifically. I’ve been able to get other models (Mistral, Pythia, GPT-2) successfully).

I’m on transformers version 4.40.1 and Python 3.11.4.

Make sure the config.json file for the specific model you’re trying to load is present in this directory. Manual Loading: If the config.json file exists in the cache directory but is still not being detected, try to manually specify the path to the config file when loading the tokenizer and model:

from transformers import PretrainedConfig, AutoModelForCausalLM, AutoTokenizer

# Manually load the config
config_path = os.path.join(my_cache_dir, 'config.json')
config = PretrainedConfig.from_json_file(config_path)

# Load the tokenizer and model with the loaded config
tokenizer = AutoTokenizer.from_pretrained(config, cache_dir=my_cache_dir)
model = AutoModelForCausalLM.from_config(config).to(cuda_device)
1 Like