Using config_kwargs within the load_dataset

For anyone that comes across this in the future, here’s the GitHub discussion that talked about this (default config name doesn't work when config kwargs are specified. · Issue #6130 · huggingface/datasets · GitHub).

Passing in your kwargs in load_dataset.

ds = datasets.load_dataset(custom_keyword1=0, custom_keyword2=1)

Accessing the kwargs passed from load_dataset.

class CustomConfig(datasets.BuilderConfig):
    def __init__(self, **kwargs):
        self.custom_keyword1 = kwargs.pop("custom_keyword1", <your-default-value>)
        self.custom_keyword2 = kwargs.pop("custom_keyword2", <your-default-value>)
        super(CustomConfig, self).__init__(**kwargs)


class CustomDataset(datasets.GeneratorBasedBuilder):
    BUILDER_CONFIGS = [
        CustomConfig(name="custom_config", version="1.0.0", description="your description"), ...
    ]    # Configs initialization
    BUILDER_CONFIG_CLASS = CustomConfig    # Must specify this to use custom config

    def _info(self):
        print(self.custom_keyword1, self.custom_keyword2)    # You can access the specified kwargs anywhere in the class instance method
        ...

    def _split_generators(self, dl_manager):
        print(self.custom_keyword1, self.custom_keyword2)    # You can access the specified kwargs anywhere in the class instance method
        ...

    def _generate_examples(self, filepaths):
        print(self.custom_keyword1, self.custom_keyword2)    # You can access the specified kwargs anywhere in the class instance method
        ...