I failed to load_dataset from local directory. I tried the following, what am i doing wrong ?
os.system('mkdir mydataset')
os.system('mkdir mydataset/train')
os.system('mkdir mydataset/train/dog/')
os.system('mkdir mydataset/train/cat/')
im1 = np.arange(20*20*3, dtype=np.uint8).reshape((20,20,3))
plt.imsave('mydataset/train/dog/im1.jpg', im1)
im2 = np.arange(20*20*3, dtype=np.uint8).reshape((20,20,3))
plt.imsave('mydataset/train/dog/im2.jpg', im2)
im3 = np.arange(20*20*3, dtype=np.uint8).reshape((20,20,3))
plt.imsave('mydataset/train/cat/im3.jpg', im3)
im4 = np.arange(20*20*3, dtype=np.uint8).reshape((20,20,3))
plt.imsave('mydataset/train/cat/im4.jpg', im4)
train_dataset = load_dataset("imagefolder", "mydataset")
In any version of load_dataset I tried, I get the same error:
ValueError Traceback (most recent call last)
Cell In[63], line 13
11 im4 = np.arange(20203, dtype=np.uint8).reshape((20,20,3))
12 plt.imsave(‘mydataset/train/cat/im4.jpg’, im4)
—> 13 train_dataset = load_dataset(“imagefolder”, “mydataset”)
File ~/miniconda3/envs/pytr/lib/python3.9/site-packages/datasets/load.py:2523, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, trust_remote_code, config_kwargs)
2518 verification_mode = VerificationMode(
2519 (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
2520 )
2522 # Create a dataset builder
→ 2523 builder_instance = load_dataset_builder(
2524 path=path,
2525 name=name,
…
File ~/miniconda3/envs/pytr/lib/python3.9/site-packages/fsspec/utils.py:734, in glob_translate(pat)
732 continue
733 elif "" in part:
→ 734 raise ValueError(
735 “Invalid pattern: ‘**’ can only be an entire path component”
736 )
737 if part:
738 results.extend(_translate(part, f"{not_sep}*", not_sep))
ValueError: Invalid pattern: ‘**’ can only be an entire path component