I am having difficulties trying to figure out how I can split my dataset into train, test, and validation. I’ve been going through the documentation here:
and the template here:
but it hasn’t become any clearer.
this is the error I keep getting:
TypeError: ‘NoneType’ object is not callable
and this is the code I’m using:
def _split_generators(self, dl_manager):
"""Returns SplitGenerators."""
dl_path = dl_manager.download_and_extract(_URLS)
titles = {k: set() for k in dl_path}
for k, path in dl_path.items():
with open(path, encoding="utf-8") as f:
for line in f:
titles[k].add(line.strip())
path_to_manual_file = os.path.join(
os.path.abspath(os.path.expanduser(dl_manager.manual_dir)), self.config.filename
)
if not os.path.exists(path_to_manual_file):
raise FileNotFoundError(
"{} does not exist. Make sure you insert a manual dir via `datasets.load_dataset('wikihow', data_dir=...)` that includes a file name {}. Manual download instructions: {})".format(
path_to_manual_file, self.config.filename, self.manual_download_instructions
)
)
return [
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
gen_kwargs={
"path": path_to_manual_file,
"title_set": titles["train"],
},
),
datasets.SplitGenerator(
name=datasets.Split.VALIDATION,
gen_kwargs={
"path": path_to_manual_file,
"title_set": titles["validation"],
},
),
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
"path": path_to_manual_file,
"title_set": titles["test"],
},
),