File “examples/language-modeling/train.py”, line 336, in
main()
File “examples/language-modeling/train.py”, line 300, in main
trainer.train(model_path=model_path)
File “/home/ksjae/.local/lib/python3.7/site-packages/transformers/trainer.py”,
line 741, in train
tr_loss += self.training_step(model, inputs)
File “/home/ksjae/.local/lib/python3.7/site-packages/transformers/trainer.py”,
line 1055, in training_step
self.scaler.scale(loss).backward()
File “/home/ksjae/.local/lib/python3.7/site-packages/torch/cuda/amp/grad_scale
r.py”, line 156, in scale
assert outputs.is_cuda
AssertionError
Oh, and using block_size of 2048 causes this error:
result = self.forward(*input, **kwargs)
File “/home/ksjae/.local/lib/python3.7/site-packages/transformers/modeling_gpt2.py”, line 594, in forward
position_embeds = self.wpe(position_ids)
File “/home/ksjae/.local/lib/python3.7/site-packages/torch/nn/modules/module.py”, line 722, in _call_impl
result = self.forward(*input, **kwargs)
File “/home/ksjae/.local/lib/python3.7/site-packages/torch/nn/modules/sparse.py”, line 126, in forward
self.norm_type, self.scale_grad_by_freq, self.sparse)
File “/home/ksjae/.local/lib/python3.7/site-packages/torch/nn/functional.py”, line 1814, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) IndexError: index out of range in self