from transformers import LlamaConfig, LlamaForCausalLM
c = LlamaConfig(<path to config.json>)
with torch.device('meta'):
m = LlamaForCausalLM(c)
w = torch.load(<path to weights.bin file>)
m.load_state_dict(w, assign=True)
m.to("cuda:0") //throws error
The last line throws the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/goelayus/.local/lib/python3.10/site-packages/transformers/modeling_utils.py", line 2692, in to
return super().to(*args, **kwargs)
File "/home/goelayus/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1152, in to
return self._apply(convert)
File "/home/goelayus/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 802, in _apply
module._apply(fn)
File "/home/goelayus/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 802, in _apply
module._apply(fn)
File "/home/goelayus/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 802, in _apply
module._apply(fn)
[Previous line repeated 2 more times]
File "/home/goelayus/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 849, in _apply
self._buffers[key] = fn(buf)
File "/home/goelayus/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1150, in convert
return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
NotImplementedError: Cannot copy out of meta tensor; no data!
Expected behavior
The model should be copied to the GPU device.