How to split a dataset into train, test, and validation?

I am having difficulties trying to figure out how I can split my dataset into train, test, and validation. I’ve been going through the documentation here:


and the template here:

but it hasn’t become any clearer.

this is the error I keep getting:
TypeError: ‘NoneType’ object is not callable

and this is the code I’m using:

def _split_generators(self, dl_manager):
    """Returns SplitGenerators."""
    dl_path = dl_manager.download_and_extract(_URLS)
    titles = {k: set() for k in dl_path}
    for k, path in dl_path.items():
        with open(path, encoding="utf-8") as f:
            for line in f:
                titles[k].add(line.strip())

    path_to_manual_file = os.path.join(
        os.path.abspath(os.path.expanduser(dl_manager.manual_dir)), self.config.filename
    )

    if not os.path.exists(path_to_manual_file):
        raise FileNotFoundError(
            "{} does not exist. Make sure you insert a manual dir via `datasets.load_dataset('wikihow', data_dir=...)` that includes a file name {}. Manual download instructions: {})".format(
                path_to_manual_file, self.config.filename, self.manual_download_instructions
            )
        )
    return [
        datasets.SplitGenerator(
            name=datasets.Split.TRAIN,
            gen_kwargs={
                "path": path_to_manual_file,
                "title_set": titles["train"],
            },
        ),
        datasets.SplitGenerator(
            name=datasets.Split.VALIDATION,
            gen_kwargs={
                "path": path_to_manual_file,
                "title_set": titles["validation"],
            },
        ),
        datasets.SplitGenerator(
            name=datasets.Split.TEST,
            gen_kwargs={
                "path": path_to_manual_file,
                "title_set": titles["test"],
            },
        ),

I think it’s answered here
How to split main dataset into train, dev, test as DatasetDict

2 Likes