import torch
from transformers import AutoConfig, AutoTokenizer, BloomForCausalLM, AutoModelForCausalLM
import time
import psutil
import gc
def get_host_memory():
memory_allocated = round(psutil.Process().memory_info().rss / 1024**3, 3)
print(âcpuâ," memory used total: ", memory_allocated, âGBâ)
def load_and_run():
s_ = time.time()
config = AutoConfig.from_pretrained(âbigscience/bloom-560mâ)
model = AutoModelForCausalLM.from_config(config)
e_ = time.time()
print("model loading time: %f " % (e_ - s_))
get_host_memory()
del model
del config
load_and_run()
gc.collect()
for obj in gc.get_objects():
try:
if torch.is_tensor(obj) or (hasattr(obj, âdataâ) and torch.is_tensor(obj.data)):
print(type(obj), obj.size())
except:
pass
get_host_memory()
Expected behavior
right now:
after model loaded:
cpu memory used total: 3.327 GB
after model deleted:
cpu memory used total: 3.364 GB
expected, host memory should be freed after model deleted.