The following worked for me until a few days ago. But now it doesn’t, even after clearing cache.
from huggingface_hub import login
login("...")
from datasets import load_datasets
ds = load_dataset("ARTPARK-IISc/VAANI", "Goa_NorthSouthGoa")
I get the error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/mnt/51eb0667-f71d-4fe0-a83e-beaff24c04fb/anaconda/envs/vaanienv/lib/python3.12/site-packages/datasets/load.py", line 2129, in load_dataset
builder_instance = load_dataset_builder(
^^^^^^^^^^^^^^^^^^^^^
File "/mnt/51eb0667-f71d-4fe0-a83e-beaff24c04fb/anaconda/envs/vaanienv/lib/python3.12/site-packages/datasets/load.py", line 1849, in load_dataset_builder
dataset_module = dataset_module_factory(
^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/51eb0667-f71d-4fe0-a83e-beaff24c04fb/anaconda/envs/vaanienv/lib/python3.12/site-packages/datasets/load.py", line 1731, in dataset_module_factory
raise e1 from None
File "/mnt/51eb0667-f71d-4fe0-a83e-beaff24c04fb/anaconda/envs/vaanienv/lib/python3.12/site-packages/datasets/load.py", line 1696, in dataset_module_factory
).get_module()
^^^^^^^^^^^^
File "/mnt/51eb0667-f71d-4fe0-a83e-beaff24c04fb/anaconda/envs/vaanienv/lib/python3.12/site-packages/datasets/load.py", line 1100, in get_module
builder_configs, default_config_name = create_builder_configs_from_metadata_configs(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/mnt/51eb0667-f71d-4fe0-a83e-beaff24c04fb/anaconda/envs/vaanienv/lib/python3.12/site-packages/datasets/load.py", line 664, in create_builder_configs_from_metadata_configs
builder_config_cls(
File "<string>", line 12, in __init__
File "/mnt/51eb0667-f71d-4fe0-a83e-beaff24c04fb/anaconda/envs/vaanienv/lib/python3.12/site-packages/datasets/packaged_modules/parquet/parquet.py", line 26, in __post_init__
super().__post_init__()
File "/mnt/51eb0667-f71d-4fe0-a83e-beaff24c04fb/anaconda/envs/vaanienv/lib/python3.12/site-packages/datasets/builder.py", line 125, in __post_init__
raise InvalidConfigName(
datasets.builder.InvalidConfigName: Bad characters from black list '<>:/\|?*' found in 'audio/AndhraPradesh/Guntur'. They could create issues when creating a directory for this config on Windows filesystem.
I am on datasets==3.2.0