I am trying to train a model in TPU using colab or kaggle. However, the script is failing with the following error. It seemed like a tqdm issue. So i tried all the imports possible in both colab and kaggle with TPUs. Also, I tried running acclerate notebook example and worked perfectly fine. Does anybody knows, why am I having the problem?
from tqdm.notebook import tqdm
from tqdm.auto import tqdm
Exception in device=TPU:2: 'NoneType' object has no attribute 'flush'
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 224, in catch_format_error
r = method(self, *args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 918, in __call__
method()
File "/opt/conda/lib/python3.7/site-packages/ipywidgets/widgets/widget.py", line 729, in _ipython_display_
display(data, raw=True)
File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 318, in display
publish_display_data(data=obj, metadata=metadata, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 122, in publish_display_data
**kwargs
File "/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 109, in publish
self._flush_streams()
File "/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 72, in _flush_streams
sys.stdout.flush()
AttributeError: 'NoneType' object has no attribute 'flush'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn
_start_fn(index, pf_cfg, fn, args)
File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn
fn(gindex, *args)
File "/opt/conda/lib/python3.7/site-packages/accelerate/utils/launch.py", line 72, in __call__
self.launcher(*args)
File "/tmp/ipykernel_786/1470955466.py", line 117, in main
en_raw_train_ds = load_dataset('text', data_files=os.path.join(args["data_dir"], "train.en"), split='train')
File "/opt/conda/lib/python3.7/site-packages/datasets/load.py", line 1684, in load_dataset
use_auth_token=use_auth_token,
File "/opt/conda/lib/python3.7/site-packages/datasets/builder.py", line 705, in download_and_prepare
dl_manager=dl_manager, verify_infos=verify_infos, **download_and_prepare_kwargs
File "/opt/conda/lib/python3.7/site-packages/datasets/builder.py", line 771, in _download_and_prepare
split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
File "/opt/conda/lib/python3.7/site-packages/datasets/packaged_modules/text/text.py", line 40, in _split_generators
data_files = dl_manager.download_and_extract(self.config.data_files)
File "/opt/conda/lib/python3.7/site-packages/datasets/download/download_manager.py", line 431, in download_and_extract
return self.extract(self.download(url_or_urls))
File "/opt/conda/lib/python3.7/site-packages/datasets/download/download_manager.py", line 315, in download
desc="Downloading data files",
File "/opt/conda/lib/python3.7/site-packages/datasets/utils/py_utils.py", line 358, in map_nested
for obj in logging.tqdm(iterable, disable=disable_tqdm, desc=desc)
File "/opt/conda/lib/python3.7/site-packages/datasets/utils/logging.py", line 204, in __call__
return tqdm_lib.tqdm(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/tqdm/notebook.py", line 243, in __init__
display(self.container)
File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 320, in display
format_dict, md_dict = format(obj, include=include, exclude=exclude)
File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 146, in format
if self.ipython_display_formatter(obj):
File "/opt/conda/lib/python3.7/site-packages/decorator.py", line 232, in fun
return caller(func, *(extras + args), **kw)
AttributeError: 'NoneType' object has no attribute 'flush'
Exception in device=TPU:5: 'NoneType' object has no attribute 'flush'
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 224, in catch_format_error
r = method(self, *args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 918, in __call__
method()
File "/opt/conda/lib/python3.7/site-packages/ipywidgets/widgets/widget.py", line 729, in _ipython_display_
display(data, raw=True)
File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 318, in display
publish_display_data(data=obj, metadata=metadata, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 122, in publish_display_data
**kwargs
File "/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 109, in publish
self._flush_streams()
File "/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 72, in _flush_streams
sys.stdout.flush()
AttributeError: 'NoneType' object has no attribute 'flush'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn
_start_fn(index, pf_cfg, fn, args)
File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn
fn(gindex, *args)
File "/opt/conda/lib/python3.7/site-packages/accelerate/utils/launch.py", line 72, in __call__
self.launcher(*args)
File "/tmp/ipykernel_786/1470955466.py", line 117, in main
en_raw_train_ds = load_dataset('text', data_files=os.path.join(args["data_dir"], "train.en"), split='train')
File "/opt/conda/lib/python3.7/site-packages/datasets/load.py", line 1684, in load_dataset
use_auth_token=use_auth_token,
File "/opt/conda/lib/python3.7/site-packages/datasets/builder.py", line 705, in download_and_prepare
dl_manager=dl_manager, verify_infos=verify_infos, **download_and_prepare_kwargs
File "/opt/conda/lib/python3.7/site-packages/datasets/builder.py", line 771, in _download_and_prepare
split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
File "/opt/conda/lib/python3.7/site-packages/datasets/packaged_modules/text/text.py", line 40, in _split_generators
data_files = dl_manager.download_and_extract(self.config.data_files)
File "/opt/conda/lib/python3.7/site-packages/datasets/download/download_manager.py", line 431, in download_and_extract
return self.extract(self.download(url_or_urls))
File "/opt/conda/lib/python3.7/site-packages/datasets/download/download_manager.py", line 315, in download
desc="Downloading data files",
File "/opt/conda/lib/python3.7/site-packages/datasets/utils/py_utils.py", line 358, in map_nested
for obj in logging.tqdm(iterable, disable=disable_tqdm, desc=desc)
File "/opt/conda/lib/python3.7/site-packages/datasets/utils/logging.py", line 204, in __call__
return tqdm_lib.tqdm(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/tqdm/notebook.py", line 243, in __init__
display(self.container)
File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 320, in display
format_dict, md_dict = format(obj, include=include, exclude=exclude)
File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 146, in format
if self.ipython_display_formatter(obj):
File "/opt/conda/lib/python3.7/site-packages/decorator.py", line 232, in fun
return caller(func, *(extras + args), **kw)
AttributeError: 'NoneType' object has no attribute 'flush'
Exception in device=TPU:4: 'NoneType' object has no attribute 'flush'
---------------------------------------------------------------------------
ProcessExitedException Traceback (most recent call last)
/tmp/ipykernel_786/1800691571.py in <module>
1 from accelerate import notebook_launcher
2
----> 3 notebook_launcher(main)
/opt/conda/lib/python3.7/site-packages/accelerate/launchers.py in notebook_launcher(function, args, num_processes, use_fp16, mixed_precision, use_port)
74 launcher = PrepareForLaunch(function, distributed_type="TPU")
75 print(f"Launching a training on {num_processes} TPU cores.")
---> 76 xmp.spawn(launcher, args=args, nprocs=num_processes, start_method="fork")
77 else:
78 # No need for a distributed launch otherwise as it's either CPU or one GPU.
/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py in spawn(fn, args, nprocs, join, daemon, start_method)
393 join=join,
394 daemon=daemon,
--> 395 start_method=start_method)
396
397
/opt/conda/lib/python3.7/site-packages/torch/multiprocessing/spawn.py in start_processes(fn, args, nprocs, join, daemon, start_method)
196
197 # Loop on join until it returns True or raises an exception.
--> 198 while not context.join():
199 pass
200
/opt/conda/lib/python3.7/site-packages/torch/multiprocessing/spawn.py in join(self, timeout)
152 error_index=error_index,
153 error_pid=failed_process.pid,
--> 154 exit_code=exitcode
155 )
156
ProcessExitedException: process 2 terminated with exit code 17