dkleine
August 27, 2025, 11:29pm
10
@Nick00243 @ewainwright
is_tokenizers_availableis looking for the tokenizerspkg:
except importlib.metadata.PackageNotFoundError:
_sklearn_available = False
_smdistributed_available = importlib.util.find_spec("smdistributed") is not None
_soundfile_available = _is_package_available("soundfile")
_spacy_available = _is_package_available("spacy")
_sudachipy_available, _sudachipy_version = _is_package_available("sudachipy", return_version=True)
_tensorflow_probability_available = _is_package_available("tensorflow_probability")
_tensorflow_text_available = _is_package_available("tensorflow_text")
_tf2onnx_available = _is_package_available("tf2onnx")
_timm_available = _is_package_available("timm")
_tokenizers_available = _is_package_available("tokenizers")
_torchaudio_available = _is_package_available("torchaudio")
_torchao_available, _torchao_version = _is_package_available("torchao", return_version=True)
_torchdistx_available = _is_package_available("torchdistx")
_torchvision_available, _torchvision_version = _is_package_available("torchvision", return_version=True)
_mlx_available = _is_package_available("mlx")
_num2words_available = _is_package_available("num2words")
_hqq_available, _hqq_version = _is_package_available("hqq", return_version=True)
_tiktoken_available = _is_package_available("tiktoken")
_blobfile_available = _is_package_available("blobfile")
_liger_kernel_available = _is_package_available("liger_kernel")
According to the requirements, you would need a specific version (>=0.21,<0.22) of the tokenizerspkg:
"tensorboard",
# TensorFlow pin. When changing this value, update examples/tensorflow/_tests_requirements.txt accordingly
"tensorflow-cpu>2.9,<2.16",
"tensorflow>2.9,<2.16",
"tensorflow-text<2.16",
"tensorflow-probability<0.24",
"tf2onnx",
"timeout-decorator",
"tiktoken",
"timm<=1.0.19,!=1.0.18",
"tokenizers>=0.21,<0.22",
"torch>=2.2",
"torchaudio",
"torchvision",
"pyctcdecode>=0.4.0",
"tqdm>=4.27",
"unidic>=1.0.2",
"unidic_lite>=1.0.7",
"urllib3<2.0.0",
"uvicorn",
"pytest-rich",
Also, the import is done here (for reference):
Idk why there is an ImportError, but it seems that the tokenizers is not correctly installed or uses the wrong version. tokenizers is an extra defined in the dependencies:
extras["accelerate"] = deps_list("accelerate")
extras["hf_xet"] = deps_list("hf_xet")
if os.name == "nt": # windows
extras["retrieval"] = deps_list("datasets") # faiss is not supported on windows
extras["flax"] = [] # jax is not supported on windows
else:
extras["retrieval"] = deps_list("faiss-cpu", "datasets")
extras["flax"] = deps_list("jax", "jaxlib", "flax", "optax", "scipy")
extras["tokenizers"] = deps_list("tokenizers")
extras["ftfy"] = deps_list("ftfy")
extras["onnxruntime"] = deps_list("onnxruntime", "onnxruntime-tools")
extras["onnx"] = deps_list("onnxconverter-common", "tf2onnx") + extras["onnxruntime"]
extras["modelcreation"] = deps_list("cookiecutter")
extras["sagemaker"] = deps_list("sagemaker")
extras["deepspeed"] = deps_list("deepspeed") + extras["accelerate"]
extras["optuna"] = deps_list("optuna")
extras["ray"] = deps_list("ray[tune]")
extras["sigopt"] = deps_list("sigopt")
… and should be installed along with transformers:
# when modifying the following list, make sure to update src/transformers/dependency_versions_check.py
install_requires = [
deps["filelock"], # filesystem locks, e.g., to prevent parallel downloads
deps["huggingface-hub"],
deps["numpy"],
deps["packaging"], # utilities from PyPA to e.g., compare versions
deps["pyyaml"], # used for the model cards metadata
deps["regex"], # for OpenAI GPT
deps["requests"], # for downloading models over HTTPS
deps["tokenizers"],
deps["safetensors"],
deps["tqdm"], # progress bars in model download and training scripts
]
setup(
name="transformers",
version="4.56.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
author_email="transformers@huggingface.co",
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
As an idea, if you have transformers already installed (but please test this in a separate env first if it works for you) :
With pip, you could try to force-reinstall transformers (this should include the tokenizers extra):
1 Like