I organized my dataset in imagefolder like that:
/content/mydataset/
train/
real/
image1.png
image2.png
fake/
image1.png
image2.png
validation/
real/
image1.png
image2.png
fake/
image1.png
image2.png
test/
real/
image1.png
image2.png
fake/
image1.png
image2.png
but when I run this code:
from datasets import load_dataset
dataset = load_dataset("imagefolder", data_dir="/content/mydataset")
I get this error:
ValueError Traceback (most recent call last)
<ipython-input-41-a88e909c5af8> in <cell line: 3>()
1 from datasets import load_dataset
2
----> 3 dataset = load_dataset("imagefolder", data_dir="/content/mydataset")
7 frames
/usr/local/lib/python3.10/dist-packages/datasets/load.py in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, trust_remote_code, **config_kwargs)
2626 keep_in_memory if keep_in_memory is not None else is_small_dataset(builder_instance.info.dataset_size)
2627 )
-> 2628 ds = builder_instance.as_dataset(split=split, verification_mode=verification_mode, in_memory=keep_in_memory)
2629 # Rename and cast features to match task schema
2630 if task is not None:
/usr/local/lib/python3.10/dist-packages/datasets/builder.py in as_dataset(self, split, run_post_process, verification_mode, ignore_verifications, in_memory)
1266
1267 # Create a dataset for each of the given splits
-> 1268 datasets = map_nested(
1269 partial(
1270 self._build_single_dataset,
/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in map_nested(function, data_struct, dict_only, map_list, map_tuple, map_numpy, num_proc, parallel_min_length, batched, batch_size, types, disable_tqdm, desc)
509 batch_size = max(len(iterable) // num_proc + int(len(iterable) % num_proc > 0), 1)
510 iterable = list(iter_batched(iterable, batch_size))
--> 511 mapped = [
512 _single_map_nested((function, obj, batched, batch_size, types, None, True, None))
513 for obj in hf_tqdm(iterable, disable=disable_tqdm, desc=desc)
/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in <listcomp>(.0)
510 iterable = list(iter_batched(iterable, batch_size))
511 mapped = [
--> 512 _single_map_nested((function, obj, batched, batch_size, types, None, True, None))
513 for obj in hf_tqdm(iterable, disable=disable_tqdm, desc=desc)
514 ]
/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in _single_map_nested(args)
371 return function([data_struct])[0]
372 else:
--> 373 return function(data_struct)
374 if (
375 batched
/usr/local/lib/python3.10/dist-packages/datasets/builder.py in _build_single_dataset(self, split, run_post_process, verification_mode, in_memory)
1296
1297 # Build base dataset
-> 1298 ds = self._as_dataset(
1299 split=split,
1300 in_memory=in_memory,
/usr/local/lib/python3.10/dist-packages/datasets/builder.py in _as_dataset(self, split, in_memory)
1370 if self._check_legacy_cache():
1371 dataset_name = self.name
-> 1372 dataset_kwargs = ArrowReader(cache_dir, self.info).read(
1373 name=dataset_name,
1374 instructions=split,
/usr/local/lib/python3.10/dist-packages/datasets/arrow_reader.py in read(self, name, instructions, split_infos, in_memory)
253 if not files:
254 msg = f'Instruction "{instructions}" corresponds to no data!'
--> 255 raise ValueError(msg)
256 return self.read_files(files=files, original_instructions=instructions, in_memory=in_memory)
257
ValueError: Instruction "train" corresponds to no data!