Within my custom.py script, I need to replace the {SubfolderDatasetName} with the name of my dataset. How can I pass that dataset name when I call the load_dataset function in my main training script? Looking at the documentation, config_kwargs** seems like it might be the answer but I haven’t been able to find any examples on how to utilize that.
class CustomConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
self.custom_keyword1 = kwargs.pop("custom_keyword1", <your-default-value>)
self.custom_keyword2 = kwargs.pop("custom_keyword2", <your-default-value>)
super(CustomConfig, self).__init__(**kwargs)
class CustomDataset(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [
CustomConfig(name="custom_config", version="1.0.0", description="your description"), ...
] # Configs initialization
BUILDER_CONFIG_CLASS = CustomConfig # Must specify this to use custom config
def _info(self):
print(self.custom_keyword1, self.custom_keyword2) # You can access the specified kwargs anywhere in the class instance method
...
def _split_generators(self, dl_manager):
print(self.custom_keyword1, self.custom_keyword2) # You can access the specified kwargs anywhere in the class instance method
...
def _generate_examples(self, filepaths):
print(self.custom_keyword1, self.custom_keyword2) # You can access the specified kwargs anywhere in the class instance method
...