OSError: [Errno 12] Cannot allocate memory

I am using on 160k items. It stopped at about 25.1k saying that there is error with memory allocation. Is there a workaround for this without having to get more RAM? I’m wondering if there’s a way to save_to_disk() for every 10k items? Is that possible?

from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D

# we need to define custom features
features = Features({
    'image': Array3D(dtype="int64", shape=(3, 224, 224)),
    'input_ids': Sequence(feature=Value(dtype='int64')),
    'attention_mask': Sequence(Value(dtype='int64')),
    'token_type_ids': Sequence(Value(dtype='int64')),
    'bbox': Array2D(dtype="int64", shape=(512, 4)),
    'labels': ClassLabel(num_classes=len(labels), names=labels),

def preprocess_data(examples):
    # take a batch of images
    images = ["images/"+path).convert("RGB") for path in examples['image_path']]
    # LayoutLMv2Processor 
    encoded_inputs = processor(images, padding="max_length", truncation=True)
    encoded_inputs["image"] = np.array(encoded_inputs["image"])
    # add labels
    encoded_inputs["labels"] = [label for label in examples["label"]]

    return encoded_inputs

encoded_dataset =, remove_columns=dataset.column_names, features=features, 

                              batched=True, batch_size=2)