This code loads a model with optional LoRA, generates 4 images. works great except after 3 rounds of generation 32 GB of CPU RAM are utilized and not released which leads to a complete system lockup. I have a 4080 /w 16 GB VRAM and I am running a function that tracks VRAM and releases it (described at bottom reclaim_mem(), but this seems unrelated.
ASK: Has anyone encountered this type of issue where using the same model, it locks up more and more system RAM during generation, it does not release the system RAM leading to a locked up system.
ASK: Any ideas of how to prevent this?
from diffusers import StableDiffusionXLPipeline
import torch
from compel import Compel, ReturnedEmbeddingsType
import utilities as u
import time
image_list = []
lora_list = ["None","Ghostly", "Gross", "Graphic Novel", "Black and White", "Color Pencil", "Oil Painting", "Jean-Baptiste Monge"]
lora_dict = {
"None" : "None",
"Ghostly" : "models/stable-diffusion/loras/SDXLGhostStyle.safetensors",
"Gross" : "models/stable-diffusion/loras/fx-monsters-xl-meatsack.safetensors",
"Graphic Novel" : "models/stable-diffusion/loras/Graphic_Novel_Illustration-000007.safetensors",
"Black and White" : "models/stable-diffusion/loras/Storyboard_sketch.safetensors",
"Color Pencil" : "models/stable-diffusion/loras/DiTerlizziArtAIccp.safetensors",
"Oil Painting" : "models/stable-diffusion/loras/oil_painting.safetensors",
"Jean-Baptiste Monge" : "models/stable-diffusion/loras/Jean-Baptiste_Monge_Style.safetensors"}
lora_keyword_dict = {
"None" : "None",
"Ghostly" : "Ghostlystyle:1",
"Gross" : "<lora:fx-monsters-xl-meatsack:0.6> fx-monsters-xl-meatsack gross horrifying",
"Graphic Novel" : "In the style of a (graphic novel)++, (ink illustration)++ <lora:Graphic_Novel_Illustration-000007(1):0.8>.and(",
"Black and White" : " ((black and white)++ sketch)+, sparse, very quick (storyboard sketch)+ heavy black lines <lora:Storyboard_sketch:0.8> ",
"Color Pencil" : "<lora:DiTerlizziArtAI:0.8> whimsical, (sketchy illustration)++, (color pencil)++, detailed background DiTerlizziArtAI",
"Oil Painting" : "highly detailed,<lora:oil_painting:0.8> Heavy Brush Strokes (bichu), (oil painting)++",
"Jean-Baptiste Monge" : "<lora:Jean-Baptiste Monge Style:1>Jean-Baptiste Monge Style " }
lora = "None"
lora_keyword = str
# Assign path to model to be used and tell torch to be ready for 32 bit
torch.backends.cuda.matmul.allow_tf32 = True
model_path = ("/app/models/stable-diffusion/SDXLFaetastic_v20.safetensors")
def pick_lora(self):
global lora
global lora_keyword
print(self)
lora = lora_dict[self]
lora_keyword = lora_keyword_dict[self]
print(f"lora to be loaded :{lora}")
print(f"lora keyword : {lora_keyword}")
def del_image_list() :
del image_list
def generate_image(sd_input) :
u.reclaim_mem()
user_lora = lora
user_keyword = lora_keyword
print(type(user_lora))
print(f"LoRA : {user_lora}")
start_time = time.time()
# batch size
num_img = 4
# create variable that calls SD model and work in float 16
# from_single_file is critical for loading a local file
pipeline = StableDiffusionXLPipeline.from_single_file(model_path, custom_pipeline="lpw_stable_diffusion", torch_dtype=torch.float16, variant="fp16" ).to("cuda")
if user_lora != "None":
pipeline.unet.load_attn_procs(user_lora)
print("LoRA loaded")
else: print("No LoRA")
# enable vae slicing ton prevent Out Of Memory Error when generating batches
pipeline.enable_vae_slicing()
# Compel is a module that could allow longer than 77 token prompts AND adding weights to specific tokens
compel = Compel(tokenizer=[pipeline.tokenizer, pipeline.tokenizer_2] ,
text_encoder=[pipeline.text_encoder, pipeline.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[False, True],
truncate_long_prompts=False)
# assign prompt as global sd_input
prompt = sd_input
if user_lora != "None":
prompt = f"{user_keyword} {prompt})"
print(prompt)
else:
print("No LoRA keywords")
print(prompt)
# Not sure what conditioning or pooled means, but it's in the demo code from here https://github.com/damian0815/compel/blob/main/compel-demo-sdxl.ipynb
negative_prompt = "watermark, text, fastnegative2, blurry, ugly, low quality, worst quality, 3d"
conditioning, pooled = compel([prompt, negative_prompt])
print(conditioning.shape, pooled.shape)
# generate image
# image = pipe(prompt=prompt,num_inference_steps=50).images[0]
for x in range(num_img):
image = pipeline(prompt_embeds=conditioning[0:1], pooled_prompt_embeds=pooled[0:1],
negative_prompt_embeds=conditioning[1:2], negative_pooled_prompt_embeds=pooled[1:2],
num_inference_steps=30, width=1024, height=1024).images[0]
image_name = u.make_image_name()
image.save(image_name)
image_list.append(image_name)
pipeline.unload_lora_weights()
del pipeline
del compel
del image
u.reclaim_mem()
print(image_list)
stop_time = time.time()
run_time = stop_time - start_time
print(f"Time to generate : {run_time}")
return image_list
Function to clear VRAM
def reclaim_mem():
allocated_memory = torch.cuda.memory_allocated()
cached_memory = torch.cuda.memory_reserved()
mem_alloc = f"Memory Allocated: {allocated_memory / 1024**2:.2f} MB"
mem_cache = f"Memory Cached: {cached_memory / 1024**2:.2f} MB"
print(mem_alloc)
print(mem_cache)
torch.cuda.ipc_collect()
gc.collect()
torch.cuda.empty_cache()
time.sleep(0.01)
print(f"Memory Allocated after del {mem_alloc}")
print(f"Memory Cached after del {mem_cache}")