Hello HF community! I working on a dataset loading script and a i want to load my dataset as four part (train, validation, test and all) But i got an ValueError: all
is a special split keyword corresponding to the union of all splits, so cannot be used as key in ._split_generator(). for “all” dataset. How can i solve this ? Here is my code:
def _split_generators(self, dl_manager): # ALL data path_a, i_path_a = dl_manager.download_and_extract(json_URLS["dataset"]), dl_manager.download(img_URLS["dataset"]) image_iters_a = dl_manager.iter_archive(i_path_a) # TRAIN data path_tr, i_path_tr = dl_manager.download_and_extract(json_URLS["train"]), dl_manager.download(img_URLS["train"]) image_iters_tr = dl_manager.iter_archive(i_path_tr) # VAL data path_v, i_path_v = dl_manager.download_and_extract(json_URLS["val"]), dl_manager.download(img_URLS["val"]) image_iters_v = dl_manager.iter_archive(i_path_v) # TEST data path_ts, i_path_ts = dl_manager.download_and_extract(json_URLS["test"]), dl_manager.download(img_URLS["test"]) image_iters_ts = dl_manager.iter_archive(i_path_ts) return [ datasets.SplitGenerator( name=datasets.Split.TRAIN, gen_kwargs={"filepath": path_tr +f'/train-{projectID}.jsonl', "images": image_iters_tr } ), datasets.SplitGenerator( name=datasets.Split.VALIDATION, gen_kwargs={"filepath": path_v +f'/val-{projectID}.jsonl', "images": image_iters_v } ), datasets.SplitGenerator( name=datasets.Split.TEST, gen_kwargs={"filepath": path_ts +f'/test-{projectID}.jsonl', "images": image_iters_ts } ), datasets.SplitGenerator( name=datasets.Split.ALL, gen_kwargs={"filepath": path_a +f'/dataset-{projectID}.jsonl', "images": image_iters_a } ), ]