When running the following code
from datasets import load_dataset
dataset = load_dataset("EleutherAI/pile")
I get the following error
Traceback (most recent call last):
File "/home/lucas/path_dependence_lmc/main.py", line 14, in <module>
pile_dataset = load_dataset("EleutherAI/pile", split='test')
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/load.py", line 2153, in load_dataset
builder_instance.download_and_prepare(
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/builder.py", line 954, in download_and_prepare
self._download_and_prepare(
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/builder.py", line 1717, in _download_and_prepare
super()._download_and_prepare(
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/builder.py", line 1027, in _download_and_prepare
split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lucas/.cache/huggingface/modules/datasets_modules/datasets/EleutherAI--pile/ebea56d358e91cf4d37b0fde361d563bed1472fbd8221a21b38fc8bb4ba554fb/pile.py", line 192, in _split_generators
data_dir = dl_manager.download(_DATA_URLS[self.config.name])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/download/download_manager.py", line 428, in download
downloaded_path_or_paths = map_nested(
^^^^^^^^^^^
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 464, in map_nested
mapped = [
^
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 465, in <listcomp>
_single_map_nested((function, obj, types, None, True, None))
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 384, in _single_map_nested
mapped = [_single_map_nested((function, v, types, None, True, None)) for v in pbar]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 384, in <listcomp>
mapped = [_single_map_nested((function, v, types, None, True, None)) for v in pbar]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 367, in _single_map_nested
return function(data_struct)
^^^^^^^^^^^^^^^^^^^^^
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/download/download_manager.py", line 454, in _download
return cached_path(url_or_filename, download_config=download_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/utils/file_utils.py", line 182, in cached_path
output_path = get_from_cache(
^^^^^^^^^^^^^^^
File "/home/lucas/.pyenv/versions/3.11.0/lib/python3.11/site-packages/datasets/utils/file_utils.py", line 596, in get_from_cache
raise FileNotFoundError(f"Couldn't find file at {url}")
FileNotFoundError: Couldn't find file at https://the-eye.eu/public/AI/pile/train/00.jsonl.zst