### Describe the bug
Loading a previously downloaded & saved dataset as descr…ibed in the HuggingFace course:
issues_dataset = load_dataset("json", data_files="issues/datasets-issues.jsonl", split="train")
Gives this error:
datasets.builder.DatasetGenerationError: An error occurred while generating the dataset
A work-around I found was to use streaming.
### Steps to reproduce the bug
Reproduce by executing the code provided:
https://huggingface.co/course/chapter5/5?fw=pt
From the heading:
'let’s create a function that can download all the issues from a GitHub repository'
### Expected behavior
No error
### Environment info
Datasets version 2.8.0. Note that version 2.6.1 gives the same error (related to null timestamp).
**[EDIT]**
This is the complete error trace confirming the issue is related to the timestamp (`Couldn't cast array of type timestamp[s] to null`)
```
Using custom data configuration default-950028611d2860c8
Downloading and preparing dataset json/default to [...]/.cache/huggingface/datasets/json/default-950028611d2860c8/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...
Downloading data files: 100%|██████████| 1/1 [00:00<?, ?it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 500.63it/s]
Generating train split: 2619 examples [00:00, 7155.72 examples/s]Traceback (most recent call last):
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\builder.py", line 1831, in _prepare_split_single
writer.write_table(table)
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\arrow_writer.py", line 567, in write_table
pa_table = table_cast(pa_table, self._schema)
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 2282, in table_cast
return cast_table_to_schema(table, schema)
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 2241, in cast_table_to_schema
arrays = [cast_array_to_feature(table[name], feature) for name, feature in features.items()]
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 2241, in <listcomp>
arrays = [cast_array_to_feature(table[name], feature) for name, feature in features.items()]
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 1807, in wrapper
return pa.chunked_array([func(chunk, *args, **kwargs) for chunk in array.chunks])
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 1807, in <listcomp>
return pa.chunked_array([func(chunk, *args, **kwargs) for chunk in array.chunks])
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 2035, in cast_array_to_feature
arrays = [_c(array.field(name), subfeature) for name, subfeature in feature.items()]
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 2035, in <listcomp>
arrays = [_c(array.field(name), subfeature) for name, subfeature in feature.items()]
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 1809, in wrapper
return func(array, *args, **kwargs)
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 2101, in cast_array_to_feature
return array_cast(array, feature(), allow_number_to_str=allow_number_to_str)
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 1809, in wrapper
return func(array, *args, **kwargs)
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\table.py", line 1990, in array_cast
raise TypeError(f"Couldn't cast array of type {array.type} to {pa_type}")
TypeError: Couldn't cast array of type timestamp[s] to null
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Program Files\JetBrains\PyCharm 2022.1.3\plugins\python\helpers\pydev\pydevconsole.py", line 364, in runcode
coro = func()
File "<input>", line 1, in <module>
File "C:\Program Files\JetBrains\PyCharm 2022.1.3\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 198, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm 2022.1.3\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "[...]\PycharmProjects\TransformersTesting\dataset_issues.py", line 20, in <module>
issues_dataset = load_dataset("json", data_files="issues/datasets-issues.jsonl", split="train")
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\load.py", line 1757, in load_dataset
builder_instance.download_and_prepare(
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\builder.py", line 860, in download_and_prepare
self._download_and_prepare(
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\builder.py", line 953, in _download_and_prepare
self._prepare_split(split_generator, **prepare_split_kwargs)
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\builder.py", line 1706, in _prepare_split
for job_id, done, content in self._prepare_split_single(
File "[...]\miniconda3\envs\HuggingFace\lib\site-packages\datasets\builder.py", line 1849, in _prepare_split_single
raise DatasetGenerationError("An error occurred while generating the dataset") from e
datasets.builder.DatasetGenerationError: An error occurred while generating the dataset
Generating train split: 2619 examples [00:19, 7155.72 examples/s]
```