I am trying to load mozilla-foundation/common_voice_6_0 dataset, but getting JSONDecodeError. The same error happens for other versions as well.
P.S. I can load the common_voice dataset without any problem.
from datasets import load_dataset
common_voice = load_dataset("mozilla-foundation/common_voice_6_0", "tr", split = "train+test+validation", use_auth_token=True)
Downloading and preparing dataset common_voice/tr to /home/ramil/.cache/huggingface/datasets/mozilla-foundation___common_voice/tr/6.0.0/1f4733442c50d49580aa34c328fae5f022e2e9fde47683da3cd3e71950cf7a6e...
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
File ~/miniconda3/envs/torch110/lib/python3.8/site-packages/requests/models.py:910, in Response.json(self, **kwargs)
909 try:
--> 910 return complexjson.loads(self.text, **kwargs)
911 except JSONDecodeError as e:
912 # Catch JSON-related errors and raise as requests.JSONDecodeError
913 # This aliases json.JSONDecodeError and simplejson.JSONDecodeError
File ~/miniconda3/envs/torch110/lib/python3.8/json/__init__.py:357, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
354 if (cls is None and object_hook is None and
355 parse_int is None and parse_float is None and
356 parse_constant is None and object_pairs_hook is None and not kw):
--> 357 return _default_decoder.decode(s)
358 if cls is None:
File ~/miniconda3/envs/torch110/lib/python3.8/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
333 """Return the Python representation of ``s`` (a ``str`` instance
334 containing a JSON document).
335
336 """
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
File ~/miniconda3/envs/torch110/lib/python3.8/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
354 except StopIteration as err:
--> 355 raise JSONDecodeError("Expecting value", s, err.value) from None
356 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
During handling of the above exception, another exception occurred:
JSONDecodeError Traceback (most recent call last)
Input In [9], in <module>
1 from datasets import load_dataset, load_metric, Audio
----> 3 common_voice = load_dataset("mozilla-foundation/common_voice_6_0", "tr", split = "train+test+validation", use_auth_token=True)
File ~/miniconda3/envs/torch110/lib/python3.8/site-packages/datasets/load.py:1702, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, script_version, **config_kwargs)
1699 try_from_hf_gcs = path not in _PACKAGED_DATASETS_MODULES
1701 # Download and prepare data
-> 1702 builder_instance.download_and_prepare(
1703 download_config=download_config,
1704 download_mode=download_mode,
1705 ignore_verifications=ignore_verifications,
1706 try_from_hf_gcs=try_from_hf_gcs,
1707 use_auth_token=use_auth_token,
1708 )
1710 # Build dataset for splits
1711 keep_in_memory = (
1712 keep_in_memory if keep_in_memory is not None else is_small_dataset(builder_instance.info.dataset_size)
1713 )
File ~/miniconda3/envs/torch110/lib/python3.8/site-packages/datasets/builder.py:594, in DatasetBuilder.download_and_prepare(self, download_config, download_mode, ignore_verifications, try_from_hf_gcs, dl_manager, base_path, use_auth_token, **download_and_prepare_kwargs)
592 logger.warning("HF google storage unreachable. Downloading and preparing it from source")
593 if not downloaded_from_gcs:
--> 594 self._download_and_prepare(
595 dl_manager=dl_manager, verify_infos=verify_infos, **download_and_prepare_kwargs
596 )
597 # Sync info
598 self.info.dataset_size = sum(split.num_bytes for split in self.info.splits.values())
File ~/miniconda3/envs/torch110/lib/python3.8/site-packages/datasets/builder.py:661, in DatasetBuilder._download_and_prepare(self, dl_manager, verify_infos, **prepare_split_kwargs)
659 split_dict = SplitDict(dataset_name=self.name)
660 split_generators_kwargs = self._make_split_generators_kwargs(prepare_split_kwargs)
--> 661 split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
663 # Checksums verification
664 if verify_infos:
File ~/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_6_0/1f4733442c50d49580aa34c328fae5f022e2e9fde47683da3cd3e71950cf7a6e/common_voice_6_0.py:153, in CommonVoice._split_generators(self, dl_manager)
150 dl_manager.download_config.ignore_url_params = True
152 self._log_download(self.config.name, bundle_version, hf_auth_token)
--> 153 archive = dl_manager.download(self._get_bundle_url(self.config.name, bundle_url_template))
155 if self.config.version < datasets.Version("5.0.0"):
156 path_to_data = ""
File ~/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_6_0/1f4733442c50d49580aa34c328fae5f022e2e9fde47683da3cd3e71950cf7a6e/common_voice_6_0.py:132, in CommonVoice._get_bundle_url(self, locale, url_template)
130 path = urllib.parse.quote(path.encode("utf-8"), safe="~()*!.'")
131 use_cdn = self.config.size_bytes < 20 * 1024 * 1024 * 1024
--> 132 response = requests.get(f"{_API_URL}/bucket/dataset/{path}/{use_cdn}", timeout=10.0).json()
133 return response["url"]
File ~/miniconda3/envs/torch110/lib/python3.8/site-packages/requests/models.py:917, in Response.json(self, **kwargs)
915 raise RequestsJSONDecodeError(e.message)
916 else:
--> 917 raise RequestsJSONDecodeError(e.msg, e.doc, e.pos)
JSONDecodeError: [Errno Expecting value] Not Found: 0