Notebook_launcher failing in colab and kaggle both

I am trying to train a model in TPU using colab or kaggle. However, the script is failing with the following error. It seemed like a tqdm issue. So i tried all the imports possible in both colab and kaggle with TPUs. Also, I tried running acclerate notebook example and worked perfectly fine. Does anybody knows, why am I having the problem?

from tqdm.notebook import tqdm

from tqdm.auto import tqdm

Exception in device=TPU:2: 'NoneType' object has no attribute 'flush'
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 224, in catch_format_error
    r = method(self, *args, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 918, in __call__
    method()
  File "/opt/conda/lib/python3.7/site-packages/ipywidgets/widgets/widget.py", line 729, in _ipython_display_
    display(data, raw=True)
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 318, in display
    publish_display_data(data=obj, metadata=metadata, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 122, in publish_display_data
    **kwargs
  File "/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 109, in publish
    self._flush_streams()
  File "/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 72, in _flush_streams
    sys.stdout.flush()
AttributeError: 'NoneType' object has no attribute 'flush'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn
    _start_fn(index, pf_cfg, fn, args)
  File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn
    fn(gindex, *args)
  File "/opt/conda/lib/python3.7/site-packages/accelerate/utils/launch.py", line 72, in __call__
    self.launcher(*args)
  File "/tmp/ipykernel_786/1470955466.py", line 117, in main
    en_raw_train_ds = load_dataset('text', data_files=os.path.join(args["data_dir"], "train.en"), split='train')
  File "/opt/conda/lib/python3.7/site-packages/datasets/load.py", line 1684, in load_dataset
    use_auth_token=use_auth_token,
  File "/opt/conda/lib/python3.7/site-packages/datasets/builder.py", line 705, in download_and_prepare
    dl_manager=dl_manager, verify_infos=verify_infos, **download_and_prepare_kwargs
  File "/opt/conda/lib/python3.7/site-packages/datasets/builder.py", line 771, in _download_and_prepare
    split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
  File "/opt/conda/lib/python3.7/site-packages/datasets/packaged_modules/text/text.py", line 40, in _split_generators
    data_files = dl_manager.download_and_extract(self.config.data_files)
  File "/opt/conda/lib/python3.7/site-packages/datasets/download/download_manager.py", line 431, in download_and_extract
    return self.extract(self.download(url_or_urls))
  File "/opt/conda/lib/python3.7/site-packages/datasets/download/download_manager.py", line 315, in download
    desc="Downloading data files",
  File "/opt/conda/lib/python3.7/site-packages/datasets/utils/py_utils.py", line 358, in map_nested
    for obj in logging.tqdm(iterable, disable=disable_tqdm, desc=desc)
  File "/opt/conda/lib/python3.7/site-packages/datasets/utils/logging.py", line 204, in __call__
    return tqdm_lib.tqdm(*args, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/tqdm/notebook.py", line 243, in __init__
    display(self.container)
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 320, in display
    format_dict, md_dict = format(obj, include=include, exclude=exclude)
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 146, in format
    if self.ipython_display_formatter(obj):
  File "/opt/conda/lib/python3.7/site-packages/decorator.py", line 232, in fun
    return caller(func, *(extras + args), **kw)
AttributeError: 'NoneType' object has no attribute 'flush'
Exception in device=TPU:5: 'NoneType' object has no attribute 'flush'
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 224, in catch_format_error
    r = method(self, *args, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 918, in __call__
    method()
  File "/opt/conda/lib/python3.7/site-packages/ipywidgets/widgets/widget.py", line 729, in _ipython_display_
    display(data, raw=True)
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 318, in display
    publish_display_data(data=obj, metadata=metadata, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 122, in publish_display_data
    **kwargs
  File "/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 109, in publish
    self._flush_streams()
  File "/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 72, in _flush_streams
    sys.stdout.flush()
AttributeError: 'NoneType' object has no attribute 'flush'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn
    _start_fn(index, pf_cfg, fn, args)
  File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn
    fn(gindex, *args)
  File "/opt/conda/lib/python3.7/site-packages/accelerate/utils/launch.py", line 72, in __call__
    self.launcher(*args)
  File "/tmp/ipykernel_786/1470955466.py", line 117, in main
    en_raw_train_ds = load_dataset('text', data_files=os.path.join(args["data_dir"], "train.en"), split='train')
  File "/opt/conda/lib/python3.7/site-packages/datasets/load.py", line 1684, in load_dataset
    use_auth_token=use_auth_token,
  File "/opt/conda/lib/python3.7/site-packages/datasets/builder.py", line 705, in download_and_prepare
    dl_manager=dl_manager, verify_infos=verify_infos, **download_and_prepare_kwargs
  File "/opt/conda/lib/python3.7/site-packages/datasets/builder.py", line 771, in _download_and_prepare
    split_generators = self._split_generators(dl_manager, **split_generators_kwargs)
  File "/opt/conda/lib/python3.7/site-packages/datasets/packaged_modules/text/text.py", line 40, in _split_generators
    data_files = dl_manager.download_and_extract(self.config.data_files)
  File "/opt/conda/lib/python3.7/site-packages/datasets/download/download_manager.py", line 431, in download_and_extract
    return self.extract(self.download(url_or_urls))
  File "/opt/conda/lib/python3.7/site-packages/datasets/download/download_manager.py", line 315, in download
    desc="Downloading data files",
  File "/opt/conda/lib/python3.7/site-packages/datasets/utils/py_utils.py", line 358, in map_nested
    for obj in logging.tqdm(iterable, disable=disable_tqdm, desc=desc)
  File "/opt/conda/lib/python3.7/site-packages/datasets/utils/logging.py", line 204, in __call__
    return tqdm_lib.tqdm(*args, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/tqdm/notebook.py", line 243, in __init__
    display(self.container)
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/display.py", line 320, in display
    format_dict, md_dict = format(obj, include=include, exclude=exclude)
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/formatters.py", line 146, in format
    if self.ipython_display_formatter(obj):
  File "/opt/conda/lib/python3.7/site-packages/decorator.py", line 232, in fun
    return caller(func, *(extras + args), **kw)
AttributeError: 'NoneType' object has no attribute 'flush'
Exception in device=TPU:4: 'NoneType' object has no attribute 'flush'
---------------------------------------------------------------------------
ProcessExitedException                    Traceback (most recent call last)
/tmp/ipykernel_786/1800691571.py in <module>
      1 from accelerate import notebook_launcher
      2 
----> 3 notebook_launcher(main)

/opt/conda/lib/python3.7/site-packages/accelerate/launchers.py in notebook_launcher(function, args, num_processes, use_fp16, mixed_precision, use_port)
     74             launcher = PrepareForLaunch(function, distributed_type="TPU")
     75             print(f"Launching a training on {num_processes} TPU cores.")
---> 76             xmp.spawn(launcher, args=args, nprocs=num_processes, start_method="fork")
     77         else:
     78             # No need for a distributed launch otherwise as it's either CPU or one GPU.

/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py in spawn(fn, args, nprocs, join, daemon, start_method)
    393         join=join,
    394         daemon=daemon,
--> 395         start_method=start_method)
    396 
    397 

/opt/conda/lib/python3.7/site-packages/torch/multiprocessing/spawn.py in start_processes(fn, args, nprocs, join, daemon, start_method)
    196 
    197     # Loop on join until it returns True or raises an exception.
--> 198     while not context.join():
    199         pass
    200 

/opt/conda/lib/python3.7/site-packages/torch/multiprocessing/spawn.py in join(self, timeout)
    152                     error_index=error_index,
    153                     error_pid=failed_process.pid,
--> 154                     exit_code=exitcode
    155                 )
    156 

ProcessExitedException: process 2 terminated with exit code 17

We need much more code than you’ve provided here, how are you calling and using the progress bar? For how to use tqdm I recommend looking at the example scripts that utilize it:

Fixed the problem. However, it was actually too emabarrasing. sys.stdout was set to None at the beginner of the function in my code. But not sure, how did this code appeared.

1 Like