My code is as following, is there something wrong? if no, why so slowly(or freeze) when running on Kaggle TPU device…
!pip install cloud-tpu-client==0.10 torch==1.9.0 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.9-cp37-cp37m-linux_x86_64.whl
!pip install accelerate
from accelerate import Accelerator
import torch_xla.core.xla_model as xm
def training_function():
dataloader = get_dataloader() # NLP task, all batch have padding into same length.
model = get_model()
optimizer = get_optimizer(model)
accelerator = Accelerator()
model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
for _ in range(20):
for batch in dataloader:
accelerator.backward(model(**batch).loss)
optimizer.step()
optimizer.zero_grad()
xm.mark_step() # <-is need this?
if "__main__" == __name__:
from accelerate import notebook_launcher
notebook_launcher(training_function) # Kaggle, TPU