@lhoestq
Here you go. It’s a bit long because it’s in a tqdm loop and the dataloader, so if it would make it easier I can simplify the code.
Traceback (most recent call last):
File "C:\Users\flabelle002\Downloads\parquet_files\fineweb_curation.py", line 85, in <module>
for sample in tqdm(dl):
File "C:\Users\flabelle002\AppData\Roaming\Python\Python311\site-packages\tqdm\std.py", line 1182, in __iter__
for obj in iterable:
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\torch\utils\data\dataloader.py", line 630, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\torch\utils\data\dataloader.py", line 1325, in _next_data
return self._process_data(data)
^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\torch\utils\data\dataloader.py", line 1371, in _process_data
data.reraise()
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\torch\_utils.py", line 694, in reraise
raise exception
requests.exceptions.ConnectionError: Caught ConnectionError in DataLoader worker process 6.
Original Traceback (most recent call last):
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\urllib3\connectionpool.py", line 791, in urlopen
response = self._make_request(
^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\urllib3\connectionpool.py", line 537, in _make_request
response = conn.getresponse()
^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\urllib3\connection.py", line 461, in getresponse
httplib_response = super().getresponse()
^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\http\client.py", line 1378, in getresponse
response.begin()
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\http\client.py", line 318, in begin
version, status, reason = self._read_status()
^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\http\client.py", line 287, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\requests\adapters.py", line 486, in send
resp = conn.urlopen(
^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\urllib3\connectionpool.py", line 845, in urlopen
retries = retries.increment(
^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\urllib3\util\retry.py", line 470, in increment
raise reraise(type(error), error, _stacktrace)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\urllib3\util\util.py", line 38, in reraise
raise value.with_traceback(tb)
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\urllib3\connectionpool.py", line 791, in urlopen
response = self._make_request(
^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\urllib3\connectionpool.py", line 537, in _make_request
response = conn.getresponse()
^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\urllib3\connection.py", line 461, in getresponse
httplib_response = super().getresponse()
^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\http\client.py", line 1378, in getresponse
response.begin()
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\http\client.py", line 318, in begin
version, status, reason = self._read_status()
^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\http\client.py", line 287, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\torch\utils\data\_utils\worker.py", line 308, in _worker_loop
data = fetcher.fetch(index)
^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 41, in fetch
data = next(self.dataset_iter)
^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\datasets\iterable_dataset.py", line 1358, in __iter__
yield from self._iter_pytorch()
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\datasets\iterable_dataset.py", line 1293, in _iter_pytorch
for key, example in ex_iterable:
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\datasets\iterable_dataset.py", line 1039, in __iter__
yield from islice(self.ex_iterable, self.n)
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\datasets\iterable_dataset.py", line 982, in __iter__
for x in self.ex_iterable:
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\datasets\iterable_dataset.py", line 281, in __iter__
for key, pa_table in self.generate_tables_fn(**self.kwargs):
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\datasets\packaged_modules\parquet\parquet.py", line 79, in _generate_tables
for batch_idx, record_batch in enumerate(
File "pyarrow\_parquet.pyx", line 1323, in iter_batches
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\datasets\download\streaming_download_manager.py", line 333, in read_with_retries
out = read(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\fsspec\spec.py", line 1858, in read
out = self.cache._fetch(self.loc, self.loc + length)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\fsspec\caching.py", line 156, in _fetch
self.cache = self.fetcher(start, end) # new block replaces old
^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\huggingface_hub-0.19.0-py3.8.egg\huggingface_hub\hf_file_system.py", line 444, in _fetch_range
r = http_backoff("GET", url, headers=headers)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\huggingface_hub-0.19.0-py3.8.egg\huggingface_hub\utils\_http.py", line 267, in http_backoff
response = session.request(method=method, url=url, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\requests\sessions.py", line 589, in request
resp = self.send(prep, **send_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\requests\sessions.py", line 725, in send
history = [resp for resp in gen]
^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\requests\sessions.py", line 725, in <listcomp>
history = [resp for resp in gen]
^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\requests\sessions.py", line 266, in resolve_redirects
resp = self.send(
^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\requests\sessions.py", line 703, in send
r = adapter.send(request, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\huggingface_hub-0.19.0-py3.8.egg\huggingface_hub\utils\_http.py", line 63, in send
return super().send(request, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\flabelle002\AppData\Local\anaconda3\envs\SRL_Model\Lib\site-packages\requests\adapters.py", line 501, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: (ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')), '(Request ID: 0e22e81e-79d7-44d9-9845-7515f81c82ac)')