The data we upload to huggingface contains some features that have a flexible time axis. For example, for each sample it can be of shape (T, 768), T varies. According to the official doc, huggingface does support this type of 2D array by leaving the first dim as None, as I put here:
features = datasets.Features(
{
"video_id": datasets.Value("string"),
"label": datasets.Value("string"),
"visual_feature": datasets.Array2D(shape=(None, 768), dtype='float64'), # (T, 768), T can vary
},
)
But when I submit it to huggingface, I get this error on the dataset viewer panel:
Error code: StreamingRowsError
Exception: ValueError
Message: shape=(768,) and ndims=2 don't match
Traceback: Traceback (most recent call last):
File "/src/services/worker/src/worker/utils.py", line 99, in get_rows_or_raise
return get_rows(
File "/src/libs/libcommon/src/libcommon/utils.py", line 272, in decorator
return func(*args, **kwargs)
File "/src/services/worker/src/worker/utils.py", line 77, in get_rows
rows_plus_one = list(itertools.islice(ds, rows_max_number + 1))
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/iterable_dataset.py", line 2270, in __iter__
for key, example in ex_iterable:
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/iterable_dataset.py", line 1856, in __iter__
for key, pa_table in self._iter_arrow():
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/iterable_dataset.py", line 1879, in _iter_arrow
for key, pa_table in self.ex_iterable._iter_arrow():
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/iterable_dataset.py", line 476, in _iter_arrow
for key, pa_table in iterator:
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/iterable_dataset.py", line 323, in _iter_arrow
for key, pa_table in self.generate_tables_fn(**gen_kwags):
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/packaged_modules/parquet/parquet.py", line 106, in _generate_tables
yield f"{file_idx}_{batch_idx}", self._cast_table(pa_table)
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/packaged_modules/parquet/parquet.py", line 73, in _cast_table
pa_table = table_cast(pa_table, self.info.features.arrow_schema)
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/features/features.py", line 1826, in arrow_schema
return pa.schema(self.type).with_metadata({"huggingface": json.dumps(hf_metadata)})
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/features/features.py", line 1815, in type
return get_nested_type(self)
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/features/features.py", line 1253, in get_nested_type
{key: get_nested_type(schema[key]) for key in schema}
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/features/features.py", line 1253, in <dictcomp>
{key: get_nested_type(schema[key]) for key in schema}
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/features/features.py", line 1277, in get_nested_type
return schema()
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/features/features.py", line 551, in __call__
pa_type = globals()[self.__class__.__name__ + "ExtensionType"](self.shape, self.dtype)
File "/src/services/worker/.venv/lib/python3.9/site-packages/datasets/features/features.py", line 665, in __init__
raise ValueError(f"shape={shape} and ndims={self.ndims} don't match")
ValueError: shape=(768,) and ndims=2 don't match
Is it an expected behavior that Array2D with first dim being None cannot display and the whole dataset viewer panel stops functioning? Or I am doing something wrong with my dataset uploading script?