Weird example of batching in Dataset.map document

You can duplicate the label for each chunk:

def chunk_examples(examples):
    chunks = []
    labels = []
    for sentence, label in zip(examples["sentence1"], examples["label"]):
        chunks += [sentence[i:i + 50] for i in range(0, len(sentence), 50)]
        labels += [label] * len(range(0, len(sentence), 50))
    return {"chunk": chunks, "label": labels}
chunked_dataset = dataset.map(chunk_examples, batched=True, remove_columns=dataset.column_names)