KeyError: 'length' when loading dataset by load_from_disk

  1. I created and save the squad_dataset using my mac machine by the using the code below.
    import os
    import torch
    from datasets import load_dataset, load_from_disk
    dataset_path = “./squad_dataset”
    if not os.path.exists(dataset_path):
    squad = load_dataset(“squad”, split=“train[:5000]”)
    squad.save_to_disk(dataset_path)

then I load it to verify the saved squad_dataset working good.
squad = load_from_disk(dataset_path)

2)then I code the squad_dataset folder from my MAC to my another machine(with GPU installed) and got the exception "KeyError: ‘length’ ", so how to fix it ?

import torch
from datasets import load_dataset
from datasets import load_from_disk
import sys
squad = load_from_disk(“xxxxxx/squad_dataset”)


KeyError Traceback (most recent call last)
in
4 import sys
5
----> 6 squad = load_from_disk(“xxxxx/squad_dataset”)
7
8

/conda/envs/notebook/lib/python3.6/site-packages/datasets/load.py in load_from_disk(dataset_path, fs, keep_in_memory)
1804 raise FileNotFoundError(f"Directory {dataset_path} not found")
1805 if fs.isfile(Path(dest_dataset_path, config.DATASET_INFO_FILENAME).as_posix()):
→ 1806 return Dataset.load_from_disk(dataset_path, fs, keep_in_memory=keep_in_memory)
1807 elif fs.isfile(Path(dest_dataset_path, config.DATASETDICT_JSON_FILENAME).as_posix()):
1808 return DatasetDict.load_from_disk(dataset_path, fs, keep_in_memory=keep_in_memory)

/conda/envs/notebook/lib/python3.6/site-packages/datasets/arrow_dataset.py in load_from_disk(dataset_path, fs, keep_in_memory)
1211 state = json.load(state_file)
1212 with open(Path(dataset_path, config.DATASET_INFO_FILENAME).as_posix(), encoding=“utf-8”) as dataset_info_file:
→ 1213 dataset_info = DatasetInfo.from_dict(json.load(dataset_info_file))
1214
1215 dataset_size = estimate_dataset_size(

/conda/envs/notebook/lib/python3.6/site-packages/datasets/info.py in from_dict(cls, dataset_info_dict)
270 def from_dict(cls, dataset_info_dict: dict) → “DatasetInfo”:
271 field_names = {f.name for f in dataclasses.fields(cls)}
→ 272 return cls(**{k: v for k, v in dataset_info_dict.items() if k in field_names})
273
274 def update(self, other_dataset_info: “DatasetInfo”, ignore_none=True):

in init(self, description, citation, homepage, license, features, post_processed, supervised_keys, task_templates, builder_name, config_name, version, splits, download_checksums, download_size, post_processing_size, dataset_size, size_in_bytes)

/conda/envs/notebook/lib/python3.6/site-packages/datasets/info.py in post_init(self)
139 # Convert back to the correct classes when we reload from dict
140 if self.features is not None and not isinstance(self.features, Features):
→ 141 self.features = Features.from_dict(self.features)
142 if self.post_processed is not None and not isinstance(self.post_processed, PostProcessedInfo):
143 self.post_processed = PostProcessedInfo.from_dict(self.post_processed)

/conda/envs/notebook/lib/python3.6/site-packages/datasets/features/features.py in from_dict(cls, dic)
1595 {‘_type’: Value(dtype=‘string’, id=None)}
1596 “”"
→ 1597 obj = generate_from_dict(dic)
1598 return cls(**obj)
1599

/conda/envs/notebook/lib/python3.6/site-packages/datasets/features/features.py in generate_from_dict(obj)
1278 # Otherwise we have a dict or a dataclass
1279 if “_type” not in obj or isinstance(obj[“_type”], dict):
→ 1280 return {key: generate_from_dict(value) for key, value in obj.items()}
1281 class_type = globals()[obj.pop(“_type”)]
1282

/conda/envs/notebook/lib/python3.6/site-packages/datasets/features/features.py in (.0)
1278 # Otherwise we have a dict or a dataclass
1279 if “_type” not in obj or isinstance(obj[“_type”], dict):
→ 1280 return {key: generate_from_dict(value) for key, value in obj.items()}
1281 class_type = globals()[obj.pop(“_type”)]
1282

/conda/envs/notebook/lib/python3.6/site-packages/datasets/features/features.py in generate_from_dict(obj)
1282
1283 if class_type == Sequence:
→ 1284 return Sequence(feature=generate_from_dict(obj[“feature”]), length=obj[“length”])
1285
1286 field_names = {f.name for f in fields(class_type)}

KeyError: ‘length’

1 Like