Cannot user load_dataset in Google colab

Hi all,

I just want to use Google colab to test my code while I suddenly found I cannot load dataset anymore.
The code is pretty simple and I have already login in the notebook. What should I now?

from datasets import load_dataset

# based on config
raw_datasets = load_dataset("HuggingFaceH4/ultrachat_200k")

TypeError Traceback (most recent call last)
in <cell line: 4>()
2
3 # based on config
----> 4 raw_datasets = load_dataset(“HuggingFaceH4/ultrachat_200k”)

18 frames
/usr/local/lib/python3.10/dist-packages/datasets/load.py in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, trust_remote_code, **config_kwargs)
2607
2608 # Download and prepare data
→ 2609 builder_instance.download_and_prepare(
2610 download_config=download_config,
2611 download_mode=download_mode,

/usr/local/lib/python3.10/dist-packages/datasets/builder.py in download_and_prepare(self, output_dir, download_config, download_mode, verification_mode, ignore_verifications, try_from_hf_gcs, dl_manager, base_path, use_auth_token, file_format, max_shard_size, num_proc, storage_options, **download_and_prepare_kwargs)
1025 if num_proc is not None:
1026 prepare_split_kwargs[“num_proc”] = num_proc
→ 1027 self._download_and_prepare(
1028 dl_manager=dl_manager,
1029 verification_mode=verification_mode,

/usr/local/lib/python3.10/dist-packages/datasets/builder.py in _download_and_prepare(self, dl_manager, verification_mode, **prepare_split_kwargs)
1098 split_dict = SplitDict(dataset_name=self.dataset_name)
1099 split_generators_kwargs = self._make_split_generators_kwargs(prepare_split_kwargs)
→ 1100 split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
1101
1102 # Checksums verification

/usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/parquet/parquet.py in _split_generators(self, dl_manager)
42 raise ValueError(f"At least one data file must be specified, but got data_files={self.config.data_files}")
43 dl_manager.download_config.extract_on_the_fly = True
—> 44 data_files = dl_manager.download_and_extract(self.config.data_files)
45 if isinstance(data_files, (str, list, tuple)):
46 files = data_files

/usr/local/lib/python3.10/dist-packages/datasets/download/download_manager.py in download_and_extract(self, url_or_urls)
432 extracted_path(s): str, extracted paths of given URL(s).
433 “”"
→ 434 return self.extract(self.download(url_or_urls))
435
436 def get_recorded_sizes_checksums(self):

/usr/local/lib/python3.10/dist-packages/datasets/download/download_manager.py in download(self, url_or_urls)
255 start_time = datetime.now()
256 with stack_multiprocessing_download_progress_bars():
→ 257 downloaded_path_or_paths = map_nested(
258 download_func,
259 url_or_urls,

/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in map_nested(function, data_struct, dict_only, map_list, map_tuple, map_numpy, num_proc, parallel_min_length, batched, batch_size, types, disable_tqdm, desc)
506 batch_size = max(len(iterable) // num_proc + int(len(iterable) % num_proc > 0), 1)
507 iterable = list(iter_batched(iterable, batch_size))
→ 508 mapped = [
509 _single_map_nested((function, obj, batched, batch_size, types, None, True, None))
510 for obj in hf_tqdm(iterable, disable=disable_tqdm, desc=desc)

/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in (.0)
507 iterable = list(iter_batched(iterable, batch_size))
508 mapped = [
→ 509 _single_map_nested((function, obj, batched, batch_size, types, None, True, None))
510 for obj in hf_tqdm(iterable, disable=disable_tqdm, desc=desc)
511 ]

/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in _single_map_nested(args)
394 }
395 else:
→ 396 mapped = [_single_map_nested((function, v, batched, batch_size, types, None, True, None)) for v in pbar]
397 if isinstance(data_struct, list):
398 return mapped

/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in (.0)
394 }
395 else:
→ 396 mapped = [_single_map_nested((function, v, batched, batch_size, types, None, True, None)) for v in pbar]
397 if isinstance(data_struct, list):
398 return mapped

/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in _single_map_nested(args)
375 and all(not isinstance(v, types) for v in data_struct)
376 ):
→ 377 return [mapped_item for batch in iter_batched(data_struct, batch_size) for mapped_item in function(batch)]
378
379 # Reduce logging to keep things readable in multiprocessing with tqdm

/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in (.0)
375 and all(not isinstance(v, types) for v in data_struct)
376 ):
→ 377 return [mapped_item for batch in iter_batched(data_struct, batch_size) for mapped_item in function(batch)]
378
379 # Reduce logging to keep things readable in multiprocessing with tqdm

/usr/local/lib/python3.10/dist-packages/datasets/download/download_manager.py in _download_batched(self, url_or_filenames, download_config)
311 )
312 else:
→ 313 return [
314 self._download_single(url_or_filename, download_config=download_config)
315 for url_or_filename in url_or_filenames

/usr/local/lib/python3.10/dist-packages/datasets/download/download_manager.py in (.0)
312 else:
313 return [
→ 314 self._download_single(url_or_filename, download_config=download_config)
315 for url_or_filename in url_or_filenames
316 ]

/usr/local/lib/python3.10/dist-packages/datasets/download/download_manager.py in _download_single(self, url_or_filename, download_config)
321 # append the relative path to the base_path
322 url_or_filename = url_or_path_join(self._base_path, url_or_filename)
→ 323 out = cached_path(url_or_filename, download_config=download_config)
324 out = tracked_str(out)
325 out.set_origin(url_or_filename)

/usr/local/lib/python3.10/dist-packages/datasets/utils/file_utils.py in cached_path(url_or_filename, download_config, **download_kwargs)
199 if is_remote_url(url_or_filename):
200 # URL, so get it from the cache (downloading if necessary)
→ 201 output_path = get_from_cache(
202 url_or_filename,
203 cache_dir=cache_dir,

/usr/local/lib/python3.10/dist-packages/datasets/utils/file_utils.py in get_from_cache(url, cache_dir, force_download, proxies, etag_timeout, resume_download, user_agent, local_files_only, use_etag, max_retries, token, use_auth_token, ignore_url_params, storage_options, download_desc, disable_tqdm)
674 ftp_get(url, temp_file)
675 elif scheme not in (“http”, “https”):
→ 676 fsspec_get(
677 url, temp_file, storage_options=storage_options, desc=download_desc, disable_tqdm=disable_tqdm
678 )

/usr/local/lib/python3.10/dist-packages/datasets/utils/file_utils.py in fsspec_get(url, temp_file, storage_options, desc, disable_tqdm)
383 }
384 )
→ 385 fs.get_file(path, temp_file.name, callback=callback)
386
387

/usr/local/lib/python3.10/dist-packages/huggingface_hub/hf_file_system.py in get_file(self, rpath, lpath, callback, outfile, **kwargs)
632 callback.set_size(expected_size)
633 try:
→ 634 http_get(
635 url=hf_hub_url(
636 repo_id=resolve_remote_path.repo_id,

TypeError: http_get() got an unexpected keyword argument ‘displayed_filename’

Same here! Last days it was working ok, but now it just throw this error :disappointed_relieved:

the same here. What should I do?

cc @mariosasko @albertvillanova @lhoestq @polinaeterna

Sounds like an issue with huggingface_hub, can you try to udpate it ?

pip install -U huggingface_hub

Maybe you imported huggingface_hub just before installing datasets and you are still using the old version:

>>> huggingface_hub.__version__
'0.20.3'

You could also try restarting your notebook kernel: Runtime > Restart session

@lhoestq Really thanks for your reply. I try it again and it works well now.