Running SDXL diffusers in a container on python running ubuntu 2204, system RAM not being released

TheDrakosfire · November 27, 2023, 5:40am

This code loads a model with optional LoRA, generates 4 images. works great except after 3 rounds of generation 32 GB of CPU RAM are utilized and not released which leads to a complete system lockup. I have a 4080 /w 16 GB VRAM and I am running a function that tracks VRAM and releases it (described at bottom reclaim_mem(), but this seems unrelated.

ASK: Has anyone encountered this type of issue where using the same model, it locks up more and more system RAM during generation, it does not release the system RAM leading to a locked up system.
ASK: Any ideas of how to prevent this?

from diffusers import StableDiffusionXLPipeline
import torch
from compel import Compel, ReturnedEmbeddingsType
import utilities as u
import time


image_list = []
lora_list = ["None","Ghostly", "Gross", "Graphic Novel", "Black and White", "Color Pencil", "Oil Painting", "Jean-Baptiste Monge"]
lora_dict = {
    "None" : "None",
    "Ghostly" : "models/stable-diffusion/loras/SDXLGhostStyle.safetensors",
    "Gross" : "models/stable-diffusion/loras/fx-monsters-xl-meatsack.safetensors",
    "Graphic Novel" : "models/stable-diffusion/loras/Graphic_Novel_Illustration-000007.safetensors",
    "Black and White" : "models/stable-diffusion/loras/Storyboard_sketch.safetensors",
    "Color Pencil" : "models/stable-diffusion/loras/DiTerlizziArtAIccp.safetensors",
    "Oil Painting" : "models/stable-diffusion/loras/oil_painting.safetensors",
    "Jean-Baptiste Monge" : "models/stable-diffusion/loras/Jean-Baptiste_Monge_Style.safetensors"}
lora_keyword_dict = {
    "None" : "None",
    "Ghostly" : "Ghostlystyle:1",
    "Gross" : "<lora:fx-monsters-xl-meatsack:0.6> fx-monsters-xl-meatsack gross horrifying",
    "Graphic Novel" : "In the style of a (graphic novel)++, (ink illustration)++ <lora:Graphic_Novel_Illustration-000007(1):0.8>.and(",
    "Black and White" : " ((black and white)++ sketch)+, sparse, very quick (storyboard sketch)+ heavy black lines <lora:Storyboard_sketch:0.8> ",
    "Color Pencil" : "<lora:DiTerlizziArtAI:0.8> whimsical, (sketchy illustration)++, (color pencil)++, detailed background DiTerlizziArtAI",
    "Oil Painting" :  "highly detailed,<lora:oil_painting:0.8> Heavy Brush Strokes (bichu), (oil painting)++",
    "Jean-Baptiste Monge" : "<lora:Jean-Baptiste Monge Style:1>Jean-Baptiste Monge Style " }
lora = "None"
lora_keyword = str

# Assign path to model to be used and tell torch to be ready for 32 bit
torch.backends.cuda.matmul.allow_tf32 = True
model_path = ("/app/models/stable-diffusion/SDXLFaetastic_v20.safetensors")


def pick_lora(self):
    global lora
    global lora_keyword
    print(self)
    lora = lora_dict[self]
    lora_keyword = lora_keyword_dict[self]
    print(f"lora to be loaded :{lora}")
    print(f"lora keyword : {lora_keyword}")
    



def del_image_list() :
    del image_list 

def generate_image(sd_input) :
    u.reclaim_mem()
    user_lora = lora
    user_keyword = lora_keyword
    print(type(user_lora))    
    print(f"LoRA : {user_lora}")
    start_time = time.time()
    # batch size    
    num_img = 4
    # create variable that calls SD model and work in float 16
    # from_single_file is critical for loading a local file
    pipeline = StableDiffusionXLPipeline.from_single_file(model_path, custom_pipeline="lpw_stable_diffusion", torch_dtype=torch.float16, variant="fp16" ).to("cuda")
    if user_lora != "None":
        
        pipeline.unet.load_attn_procs(user_lora)
        print("LoRA loaded")
    else: print("No LoRA")

    # enable vae slicing ton prevent Out Of Memory Error when generating batches
    pipeline.enable_vae_slicing()
    
    # Compel is a module that could allow longer than 77 token prompts AND adding weights to specific tokens
    compel = Compel(tokenizer=[pipeline.tokenizer, pipeline.tokenizer_2] , 
                text_encoder=[pipeline.text_encoder, pipeline.text_encoder_2], 
                returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, 
                requires_pooled=[False, True],
               truncate_long_prompts=False)

        
    # assign prompt as global sd_input
    prompt = sd_input
    if user_lora != "None":
        prompt = f"{user_keyword} {prompt})"
        print(prompt)
    else: 
        print("No LoRA keywords")
        print(prompt)

    # Not sure what conditioning or pooled means, but it's in the demo code from here https://github.com/damian0815/compel/blob/main/compel-demo-sdxl.ipynb
    negative_prompt = "watermark, text, fastnegative2, blurry, ugly, low quality, worst quality, 3d"
    conditioning, pooled = compel([prompt, negative_prompt])
    print(conditioning.shape, pooled.shape)
    
    

    # generate image
    # image = pipe(prompt=prompt,num_inference_steps=50).images[0]
    for x in range(num_img):
        image = pipeline(prompt_embeds=conditioning[0:1], pooled_prompt_embeds=pooled[0:1], 
                    negative_prompt_embeds=conditioning[1:2], negative_pooled_prompt_embeds=pooled[1:2],
                    num_inference_steps=30, width=1024, height=1024).images[0]
        image_name = u.make_image_name()
        image.save(image_name)
        image_list.append(image_name)
    pipeline.unload_lora_weights()
    del pipeline
    del compel
    del image
    u.reclaim_mem()
    print(image_list)
    stop_time = time.time()
    run_time = stop_time - start_time
    print(f"Time to generate : {run_time}")

    return image_list

Function to clear VRAM

def reclaim_mem():
    allocated_memory = torch.cuda.memory_allocated()
    cached_memory = torch.cuda.memory_reserved()
    mem_alloc = f"Memory Allocated: {allocated_memory / 1024**2:.2f} MB"
    mem_cache = f"Memory Cached: {cached_memory / 1024**2:.2f} MB"
    print(mem_alloc)
    print(mem_cache)
    torch.cuda.ipc_collect()
    gc.collect()
    torch.cuda.empty_cache()
    time.sleep(0.01)
    print(f"Memory Allocated after del {mem_alloc}")
    print(f"Memory Cached after del {mem_cache}")

Topic		Replies	Views
Memory explosion while using Diffusers pipeline 🧨 Diffusers	0	513	August 2, 2023
Program not working on GPU but works on CPU Intermediate	24	197	June 24, 2025
SDXL lora training taking too much memory 🧨 Diffusers	0	1636	July 30, 2023
Abnormally high VRAM required when using StableVideoDiffusionPipeline 🧨 Diffusers	2	637	January 2, 2024
Oscillating VRAM when generating Intermediate	0	28	November 25, 2024

Running SDXL diffusers in a container on python running ubuntu 2204, system RAM not being released

Related topics