This error was because of v_array and the use of py.array().
However, when I remove that, I still have an error:
File “C:\Users<username>\miniforge3\envs\wav2vec_pretraining\lib\runpy.py”, line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File “C:\Users<username>\miniforge3\envs\wav2vec_pretraining\lib\runpy.py”, line 87, in run_code
exec(code, run_globals)
File "c:\Users<username>.vscode\extensions\ms-python.python-2023.4.1\pythonFiles\lib\python\debugpy\adapter/…/…\debugpy\launcher/…/…\debugpy_main.py", line 39, in
cli.main()
File “c:\Users<username>.vscode\extensions\ms-python.python-2023.4.1\pythonFiles\lib\python\debugpy\adapter/…/…\debugpy\launcher/…/…\debugpy/…\debugpy\server\cli.py”, line 430, in main
run()
File “c:\Users<username>.vscode\extensions\ms-python.python-2023.4.1\pythonFiles\lib\python\debugpy\adapter/…/…\debugpy\launcher/…/…\debugpy/…\debugpy\server\cli.py”, line 284, in run_file
runpy.run_path(target, run_name=“main”)
File “c:\Users<username>.vscode\extensions\ms-python.python-2023.4.1\pythonFiles\lib\python\debugpy_vendored\pydevd_pydevd_bundle\pydevd_runpy.py”, line 321, in run_path
return _run_module_code(code, init_globals, run_name,
File “c:\Users<username>.vscode\extensions\ms-python.python-2023.4.1\pythonFiles\lib\python\debugpy_vendored\pydevd_pydevd_bundle\pydevd_runpy.py”, line 135, in _run_module_code
_run_code(code, mod_globals, init_globals,
File “c:\Users<username>.vscode\extensions\ms-python.python-2023.4.1\pythonFiles\lib\python\debugpy_vendored\pydevd_pydevd_bundle\pydevd_runpy.py”, line 124, in _run_code
exec(code, run_globals)
File “c:\Users<username>\Documents\Codes_Python\pretraining_wav2vec\Code_Lightning\wav2vec_retraining\src\data\hdf5_to_validation.py”, line 97, in
dict_valid = datasets.Dataset.from_dict({‘input_values’: v_array_values})
File “C:\Users<username>\miniforge3\envs\wav2vec_pretraining\lib\site-packages\datasets\arrow_dataset.py”, line 859, in from_dict
pa_table = InMemoryTable.from_pydict(mapping=mapping)
File “C:\Users<username>\miniforge3\envs\wav2vec_pretraining\lib\site-packages\datasets\table.py”, line 750, in from_pydict
return cls(pa.Table.from_pydict(*args, **kwargs))
File “pyarrow\table.pxi”, line 3625, in pyarrow.lib.Table.from_pydict
File “pyarrow\table.pxi”, line 5150, in pyarrow.lib._from_pydict
File “pyarrow\array.pxi”, line 342, in pyarrow.lib.asarray
File “pyarrow\array.pxi”, line 230, in pyarrow.lib.array
File “pyarrow\array.pxi”, line 110, in pyarrow.lib._handle_arrow_array_protocol
File “C:\Users<username>\miniforge3\envs\wav2vec_pretraining\lib\site-packages\datasets\arrow_writer.py”, line 180, in arrow_array
out = list_of_np_array_to_pyarrow_listarray(data)
File “C:\Users<username>\miniforge3\envs\wav2vec_pretraining\lib\site-packages\datasets\features\features.py”, line 1330, in list_of_np_array_to_pyarrow_listarray
return list_of_pa_arrays_to_pyarrow_listarray(
File “C:\Users<username>\miniforge3\envs\wav2vec_pretraining\lib\site-packages\datasets\features\features.py”, line 1322, in list_of_pa_arrays_to_pyarrow_listarray
offsets = pa.array(offsets, type=pa.int32())
File “pyarrow\array.pxi”, line 312, in pyarrow.lib.array
File “pyarrow\array.pxi”, line 83, in pyarrow.lib._ndarray_to_array
OverflowError: Python int too large to convert to C long
Do you have an idea about what could cause this error? I am on Windows, but I have an analog error on a Linux device too.
Thanks!