KeyError: 'marketplace' while downloading "amazon_us_reviews" dataset

I am trying to download the amazon_us_reviews dataset using the following code:

from datasets import load_dataset
dataset = load_dataset("amazon_us_reviews", "Toys_v1_00")

I am getting the following error:

KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/datasets/builder.py in _prepare_split_single(self, gen_kwargs, fpath, file_format, max_shard_size, split_info, check_duplicate_keys, job_id)
   1692         )
-> 1693         self._beam_writers[split_name] = beam_writer
   1694 

11 frames
/usr/local/lib/python3.10/dist-packages/datasets/features/features.py in encode_example(self, example)
   1850         ```
-> 1851         """
   1852         return copy.deepcopy(self)

/usr/local/lib/python3.10/dist-packages/datasets/features/features.py in encode_nested_example(schema, obj, level)
   1228     """Decode a nested example.
-> 1229     This is used since some features (in particular Audio and Image) have some logic during decoding.
   1230 

/usr/local/lib/python3.10/dist-packages/datasets/features/features.py in <dictcomp>(.0)
   1228     """Decode a nested example.
-> 1229     This is used since some features (in particular Audio and Image) have some logic during decoding.
   1230 

/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in zip_dict(*dicts)
    321     def __get__(self, obj, objtype=None):
--> 322         return self.fget.__get__(None, objtype)()
    323 

/usr/local/lib/python3.10/dist-packages/datasets/utils/py_utils.py in <genexpr>(.0)
    321     def __get__(self, obj, objtype=None):
--> 322         return self.fget.__get__(None, objtype)()
    323 

KeyError: 'marketplace'

The above exception was the direct cause of the following exception:

DatasetGenerationError                    Traceback (most recent call last)
<ipython-input-25-341913a5da6a> in <cell line: 1>()
----> 1 dataset = load_dataset("amazon_us_reviews", "Toys_v1_00")

/usr/local/lib/python3.10/dist-packages/datasets/load.py in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, token, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)

/usr/local/lib/python3.10/dist-packages/datasets/builder.py in download_and_prepare(self, output_dir, download_config, download_mode, verification_mode, ignore_verifications, try_from_hf_gcs, dl_manager, base_path, use_auth_token, file_format, max_shard_size, num_proc, storage_options, **download_and_prepare_kwargs)
    952         with FileLock(lock_path) if is_local else contextlib.nullcontext():
    953             self.info.write_to_directory(self._output_dir, fs=self._fs)
--> 954 
    955     def _save_infos(self):
    956         is_local = not is_remote_filesystem(self._fs)

/usr/local/lib/python3.10/dist-packages/datasets/builder.py in _download_and_prepare(self, dl_manager, verification_mode, **prepare_splits_kwargs)

/usr/local/lib/python3.10/dist-packages/datasets/builder.py in _download_and_prepare(self, dl_manager, verification_mode, **prepare_split_kwargs)
   1047             in_memory=in_memory,
   1048         )
-> 1049         if run_post_process:
   1050             for resource_file_name in self._post_processing_resources(split).values():
   1051                 if os.sep in resource_file_name:

/usr/local/lib/python3.10/dist-packages/datasets/builder.py in _prepare_split(self, split_generator, check_duplicate_keys, file_format, num_proc, max_shard_size)
   1553 
   1554 class BeamBasedBuilder(DatasetBuilder):
-> 1555     """Beam based Builder."""
   1556 
   1557     # BeamBasedBuilder does not have dummy data for tests yet

/usr/local/lib/python3.10/dist-packages/datasets/builder.py in _prepare_split_single(self, gen_kwargs, fpath, file_format, max_shard_size, split_info, check_duplicate_keys, job_id)

DatasetGenerationError: An error occurred while generating the dataset

I tried with the load_dataset_builder it is showing the following features:

from datasets import load_dataset_builder
ds_builder = load_dataset_builder("amazon_us_reviews", "Toys_v1_00")
print(ds_builder.info.features)

Output:

{'marketplace': Value(dtype='string', id=None),
 'customer_id': Value(dtype='string', id=None),
 'review_id': Value(dtype='string', id=None),
 'product_id': Value(dtype='string', id=None),
 'product_parent': Value(dtype='string', id=None),
 'product_title': Value(dtype='string', id=None),
 'product_category': Value(dtype='string', id=None),
 'star_rating': Value(dtype='int32', id=None),
 'helpful_votes': Value(dtype='int32', id=None),
 'total_votes': Value(dtype='int32', id=None),
 'vine': ClassLabel(names=['N', 'Y'], id=None),
 'verified_purchase': ClassLabel(names=['N', 'Y'], id=None),
 'review_headline': Value(dtype='string', id=None),
 'review_body': Value(dtype='string', id=None),
 'review_date': Value(dtype='string', id=None)}

The datasets version used is 2.14.4

Is this the correct way to download the dataset? Kindly advise.

Hi, the data is not available anymore.

We contacted the authors and unfortunately they reported that Amazon has decided to stop distributing this dataset.