For anyone that comes across this in the future, here’s the GitHub discussion that talked about this (default config name doesn't work when config kwargs are specified. · Issue #6130 · huggingface/datasets · GitHub).
Passing in your kwargs in load_dataset.
ds = datasets.load_dataset(custom_keyword1=0, custom_keyword2=1)
Accessing the kwargs passed from load_dataset.
class CustomConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
self.custom_keyword1 = kwargs.pop("custom_keyword1", <your-default-value>)
self.custom_keyword2 = kwargs.pop("custom_keyword2", <your-default-value>)
super(CustomConfig, self).__init__(**kwargs)
class CustomDataset(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [
CustomConfig(name="custom_config", version="1.0.0", description="your description"), ...
] # Configs initialization
BUILDER_CONFIG_CLASS = CustomConfig # Must specify this to use custom config
def _info(self):
print(self.custom_keyword1, self.custom_keyword2) # You can access the specified kwargs anywhere in the class instance method
...
def _split_generators(self, dl_manager):
print(self.custom_keyword1, self.custom_keyword2) # You can access the specified kwargs anywhere in the class instance method
...
def _generate_examples(self, filepaths):
print(self.custom_keyword1, self.custom_keyword2) # You can access the specified kwargs anywhere in the class instance method
...