Program not working on GPU but works on CPU

Hello

I am trying to run this program on GPU, it generates a black image. On CPU it gives the perfect output.

The program is as follows:

import logging
from diffusers import AutoPipelineForText2Image, AutoencoderKL
import torch
import numpy as np
import random
import os
from PIL import Image

# =========================
# STEP 0: Logging Setup
# =========================
log_file = "generation_log.txt"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)

logging.info("Initializing...")

# =========================
# STEP 1: Environment Setup
# =========================

torch.cuda.empty_cache()
torch.cuda.ipc_collect()

seed = random.randint(0, 9999999)
torch.manual_seed(seed)
np.random.seed(seed)
logging.info(f"Using seed: {seed}")

# ===============================
# STEP 2: Model and LoRA Setup
# ===============================
logging.info("Loading base model and LoRA weights...")

model_dir = "D:\\Ganu\\AIImage\\huggingface\\kohya_ss\\kohya_ss\\outputs"
lora_weights_path = os.path.join(model_dir, "model")
model_id = "stabilityai/stable-diffusion-xl-base-1.0"

# Optional: Custom VAE (uncomment if needed)
# vae = AutoencoderKL.from_pretrained(
#     "madebyollin/sdxl-vae-fp16-fix",
#     torch_dtype=torch.float16
# ).to("cuda")

try:
    pipeline = AutoPipelineForText2Image.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        variant="fp16"
    ).to("cuda")
    logging.info("Pipeline loaded to GPU with float16.")
except Exception as e:
    logging.error(f"Failed to load model pipeline: {e}")
    raise

# If using VAE:
# pipeline.vae = vae

pipeline.enable_attention_slicing()
pipeline.enable_vae_slicing()

try:
    pipeline.load_lora_weights(lora_weights_path, weight_name="last.safetensors")
    logging.info("LoRA weights loaded successfully.")
except ValueError as e:
    logging.error("Invalid LoRA checkpoint. Check the format or compatibility.")
    raise e

# =========================
# STEP 3: Prompt Inference
# =========================
text_prompt = (
    "A wide, breathtaking landscape with all real vibrant nature-themed background, lush forests, mountains, and a Doctor standing prominently in the foreground"
)

negative_prompt = (
    "text, letters, words, signage, logos, labels, writing, messy background, busy layout, clutter, double faces, abstract shapes, UI panels with words, overlapping elements, header, footer, top bar, navigation bar, bottom menu, toolbar, top text, website layout, browser frame, button row, page border, UI bar"
)

logging.info(f"Running inference with prompt: {text_prompt}")

try:
    result = pipeline(
        prompt=text_prompt,
        negative_prompt=negative_prompt,
        guidance_scale=7.5,
        num_inference_steps=30
    )
    generated_image = result.images[0]
    output_path = f"generated_image_{seed}.png"
    generated_image.save(output_path)
    logging.info(f"Image saved to: {output_path}")
    generated_image.show()
except Exception as e:
    logging.error(f"Error during image generation: {e}")
    raise

The environment details are as follows

C:\Users\ADMIN>nvidia-smi
Wed May 14 15:17:51 2025
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 566.36                 Driver Version: 566.36         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  NVIDIA GeForce GTX 1650      WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   62C    P0             32W /   50W |    3833MiB /   4096MiB |    100%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+-----------------------------------------------------------------------------------------+
| Processes:                                                                              |
|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |
|        ID   ID                                                               Usage      |
|=========================================================================================|
|    0   N/A  N/A      3008    C+G   ...n\NVIDIA app\CEF\NVIDIA Overlay.exe      N/A      |
|    0   N/A  N/A      6232    C+G   ...b3d8bbwe\Microsoft.Media.Player.exe      N/A      |
|    0   N/A  N/A     10308    C+G   ...oogle\Chrome\Application\chrome.exe      N/A      |
|    0   N/A  N/A     15020    C+G   ...n\NVIDIA app\CEF\NVIDIA Overlay.exe      N/A      |
|    0   N/A  N/A     16140    C+G   C:\Windows\explorer.exe                     N/A      |
|    0   N/A  N/A     17036    C+G   ...siveControlPanel\SystemSettings.exe      N/A      |
|    0   N/A  N/A     17088    C+G   ...oogle\Chrome\Application\chrome.exe      N/A      |
|    0   N/A  N/A     17732    C+G   ...CBS_cw5n1h2txyewy\TextInputHost.exe      N/A      |
|    0   N/A  N/A     19012    C+G   ...on\135.0.3179.98\msedgewebview2.exe      N/A      |
|    0   N/A  N/A     19720    C+G   ...t.LockApp_cw5n1h2txyewy\LockApp.exe      N/A      |
|    0   N/A  N/A     20816    C+G   ...2txyewy\StartMenuExperienceHost.exe      N/A      |
|    0   N/A  N/A     20948    C+G   ....Search_cw5n1h2txyewy\SearchApp.exe      N/A      |
|    0   N/A  N/A     21008    C+G   ....Search_cw5n1h2txyewy\SearchApp.exe      N/A      |
|    0   N/A  N/A     22108    C+G   ...5n1h2txyewy\ShellExperienceHost.exe      N/A      |
|    0   N/A  N/A     23296      C   ...gface\kohya_ss\Python310\python.exe      N/A      |
|    0   N/A  N/A     24012    C+G   ...137.0_x64__dt26b99r8h8gj\RtkUWP.exe      N/A      |
+-----------------------------------------------------------------------------------------+
(venv) D:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo>python
Python 3.10.10 (tags/v3.10.10:aad5f6a, Feb  7 2023, 17:20:36) [MSC v.1929 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> print(f"CUDA Available: {torch.cuda.is_available()}")
CUDA Available: True
>>> print(f"GPU Name: {torch.cuda.get_device_name(0)}")
GPU Name: NVIDIA GeForce GTX 1650
>>> print(f"PyTorch Version: {torch.__version__}")
PyTorch Version: 2.7.0+cu118
>>> print(f"CUDA Version: {torch.version.cuda}")
CUDA Version: 11.8

Any pointers?

P.S:

  1. The GPU version was working before, but I cleaned my computer removing several apps possible some dlls and programs like Microsoft Visual Studio

  2. I tried connecting with chatgpt and grok, but their suggestions made CPU work, but not the GPU

  3. Logs don’t give any error

1 Like

The diffusion model tends to encounter various errors (such as incomplete errors) when using float16. As you mentioned in your commented-out code, using an optional VAE is likely to work properly.

# Optional: Custom VAE (uncomment if needed)
vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix",
    torch_dtype=torch.float16
).to("cuda")

try:
    pipeline = AutoPipelineForText2Image.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        variant="fp16",
        device_map="auto"
    )#.to("cuda")
    logging.info("Pipeline loaded to GPU with float16.")
except Exception as e:
    logging.error(f"Failed to load model pipeline: {e}")
    raise

# If using VAE:
pipeline.vae = vae

Hello

I am getting the following error after modifying the code:


(venv) D:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo>python John-Training-12thFeb2025-original.py
2025-05-15 09:56:28,925 - INFO - Initializing...
2025-05-15 09:56:28,951 - INFO - Using seed: 1421589
2025-05-15 09:56:28,951 - INFO - Loading base model and LoRA weights...
2025-05-15 09:56:29,970 - ERROR - Failed to load model pipeline: auto not supported. Supported strategies are: balanced
Traceback (most recent call last):
  File "D:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo\John-Training-12thFeb2025-original.py", line 52, in <module>
    pipeline = AutoPipelineForText2Image.from_pretrained(
  File "D:\Ganu\AIImage\venv\lib\site-packages\huggingface_hub\utils\_validators.py", line 114, in _inner_fn
    return fn(*args, **kwargs)
  File "D:\Ganu\AIImage\venv\lib\site-packages\diffusers\pipelines\auto_pipeline.py", line 443, in from_pretrained
    return text_2_image_cls.from_pretrained(pretrained_model_or_path, **kwargs)
  File "D:\Ganu\AIImage\venv\lib\site-packages\huggingface_hub\utils\_validators.py", line 114, in _inner_fn
    return fn(*args, **kwargs)
  File "D:\Ganu\AIImage\venv\lib\site-packages\diffusers\pipelines\pipeline_utils.py", line 745, in from_pretrained
    raise NotImplementedError(
NotImplementedError: auto not supported. Supported strategies are: balanced

The program is as follows:

import logging
from diffusers import AutoPipelineForText2Image, AutoencoderKL
import torch
import numpy as np
import random
import os
from PIL import Image

# =========================
# STEP 0: Logging Setup
# =========================
log_file = "generation_log.txt"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)

logging.info("Initializing...")

# =========================
# STEP 1: Environment Setup
# =========================

torch.cuda.empty_cache()
torch.cuda.ipc_collect()

seed = random.randint(0, 9999999)
torch.manual_seed(seed)
np.random.seed(seed)
logging.info(f"Using seed: {seed}")

# ===============================
# STEP 2: Model and LoRA Setup
# ===============================
logging.info("Loading base model and LoRA weights...")

model_dir = "D:\\Ganu\\AIImage\\huggingface\\kohya_ss\\kohya_ss\\outputs"
lora_weights_path = os.path.join(model_dir, "model")
model_id = "stabilityai/stable-diffusion-xl-base-1.0"

# Optional: Custom VAE (uncomment if needed)
vae = AutoencoderKL.from_pretrained(
     "madebyollin/sdxl-vae-fp16-fix",
     torch_dtype=torch.float16
 ).to("cuda")

try:
    pipeline = AutoPipelineForText2Image.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        variant="fp16",
        device_map="auto"
    ).to("cuda")
    logging.info("Pipeline loaded to GPU with float16.")
except Exception as e:
    logging.error(f"Failed to load model pipeline: {e}")
    raise

# If using VAE:
pipeline.vae = vae

pipeline.enable_attention_slicing()
pipeline.enable_vae_slicing()

try:
    pipeline.load_lora_weights(lora_weights_path, weight_name="last.safetensors")
    logging.info("LoRA weights loaded successfully.")
except ValueError as e:
    logging.error("Invalid LoRA checkpoint. Check the format or compatibility.")
    raise e

# =========================
# STEP 3: Prompt Inference
# =========================
text_prompt = (
    "A wide, breathtaking landscape with all real vibrant nature-themed background, lush forests, mountains, and a Doctor standing prominently in the foreground"
)

negative_prompt = (
    "text, letters, words, signage, logos, labels, writing, messy background, busy layout, clutter, double faces, abstract shapes, UI panels with words, overlapping elements, header, footer, top bar, navigation bar, bottom menu, toolbar, top text, website layout, browser frame, button row, page border, UI bar"
)

logging.info(f"Running inference with prompt: {text_prompt}")

try:
    result = pipeline(
        prompt=text_prompt,
        negative_prompt=negative_prompt,
        guidance_scale=7.5,
        num_inference_steps=30
    )
    generated_image = result.images[0]
    output_path = f"generated_image_{seed}.png"
    generated_image.save(output_path)
    logging.info(f"Image saved to: {output_path}")
    generated_image.show()
except Exception as e:
    logging.error(f"Error during image generation: {e}")
    raise

And i get the same error with β€œbalanced” too!

1 Like

Hmm… So simply this?

# Optional: Custom VAE (uncomment if needed)
vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix",
    torch_dtype=torch.float16
).to("cuda")

try:
    pipeline = AutoPipelineForText2Image.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        variant="fp16",
        #device_map="auto"
    ).to("cuda")
    logging.info("Pipeline loaded to GPU with float16.")
except Exception as e:
    logging.error(f"Failed to load model pipeline: {e}")
    raise

# pipeline.enable_model_cpu_offload()

# If using VAE:
pipeline.vae = vae

still getting a plain black image, but no errors

import logging
from diffusers import AutoPipelineForText2Image, AutoencoderKL
import torch
import numpy as np
import random
import os
from PIL import Image

# =========================
# STEP 0: Logging Setup
# =========================
log_file = "generation_log.txt"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)

logging.info("Initializing...")

# =========================
# STEP 1: Environment Setup
# =========================

torch.cuda.empty_cache()
torch.cuda.ipc_collect()

seed = random.randint(0, 9999999)
torch.manual_seed(seed)
np.random.seed(seed)
logging.info(f"Using seed: {seed}")

# ===============================
# STEP 2: Model and LoRA Setup
# ===============================
logging.info("Loading base model and LoRA weights...")

model_dir = "D:\\Ganu\\AIImage\\huggingface\\kohya_ss\\kohya_ss\\outputs"
lora_weights_path = os.path.join(model_dir, "model")
model_id = "stabilityai/stable-diffusion-xl-base-1.0"

# Optional: Custom VAE (uncomment if needed)
vae = AutoencoderKL.from_pretrained(
     "madebyollin/sdxl-vae-fp16-fix",
     torch_dtype=torch.float16
 ).to("cuda")

try:
    pipeline = AutoPipelineForText2Image.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        variant="fp16",
        #device_map="auto"
    ).to("cuda")
    logging.info("Pipeline loaded to GPU with float16.")
except Exception as e:
    logging.error(f"Failed to load model pipeline: {e}")
    raise

# pipeline.enable_model_cpu_offload()

# If using VAE:
pipeline.vae = vae

pipeline.enable_attention_slicing()
pipeline.enable_vae_slicing()

try:
    pipeline.load_lora_weights(lora_weights_path, weight_name="last.safetensors")
    logging.info("LoRA weights loaded successfully.")
except ValueError as e:
    logging.error("Invalid LoRA checkpoint. Check the format or compatibility.")
    raise e

# =========================
# STEP 3: Prompt Inference
# =========================
text_prompt = (
    "A wide, breathtaking landscape with all real vibrant nature-themed background, lush forests, mountains, and a Doctor standing prominently in the foreground"
)

negative_prompt = (
    "text, letters, words, signage, logos, labels, writing, messy background, busy layout, clutter, double faces, abstract shapes, UI panels with words, overlapping elements, header, footer, top bar, navigation bar, bottom menu, toolbar, top text, website layout, browser frame, button row, page border, UI bar"
)

logging.info(f"Running inference with prompt: {text_prompt}")

try:
    result = pipeline(
        prompt=text_prompt,
        negative_prompt=negative_prompt,
        guidance_scale=7.5,
        num_inference_steps=30
    )
    generated_image = result.images[0]
    output_path = f"generated_image_{seed}.png"
    generated_image.save(output_path)
    logging.info(f"Image saved to: {output_path}")
    generated_image.show()
except Exception as e:
    logging.error(f"Error during image generation: {e}")
    raise
1 Like

Hmm… Perhaps LoRA loading issue…?

import logging
from diffusers import AutoPipelineForText2Image, AutoencoderKL
import torch
import numpy as np
import random
import os
from PIL import Image

# =========================
# STEP 0: Logging Setup
# =========================
log_file = "generation_log.txt"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)

logging.info("Initializing...")

# =========================
# STEP 1: Environment Setup
# =========================

torch.cuda.empty_cache()
torch.cuda.ipc_collect()

seed = random.randint(0, 9999999)
torch.manual_seed(seed)
np.random.seed(seed)
logging.info(f"Using seed: {seed}")

# ===============================
# STEP 2: Model and LoRA Setup
# ===============================
logging.info("Loading base model and LoRA weights...")

model_dir = "D:\\Ganu\\AIImage\\huggingface\\kohya_ss\\kohya_ss\\outputs"
lora_weights_path = os.path.join(model_dir, "model")
model_id = "stabilityai/stable-diffusion-xl-base-1.0"

# Optional: Custom VAE (uncomment if needed)
vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix",
    torch_dtype=torch.float16
).to("cuda")

try:
    pipeline = AutoPipelineForText2Image.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        variant="fp16",
    ).to("cuda")
    logging.info("Pipeline loaded to GPU with float16.")
except Exception as e:
    logging.error(f"Failed to load model pipeline: {e}")
    raise

#pipeline.enable_model_cpu_offload()

# If using VAE:
pipeline.vae = vae

pipeline.enable_attention_slicing()
pipeline.enable_vae_slicing()

"""try:
    pipeline.load_lora_weights(lora_weights_path, weight_name="last.safetensors")
    logging.info("LoRA weights loaded successfully.")
except ValueError as e:
    logging.error("Invalid LoRA checkpoint. Check the format or compatibility.")
    raise e
"""

# =========================
# STEP 3: Prompt Inference
# =========================
text_prompt = (
    "A wide, breathtaking landscape with all real vibrant nature-themed background, lush forests, mountains, and a Doctor standing prominently in the foreground"
)

negative_prompt = (
    "text, letters, words, signage, logos, labels, writing, messy background, busy layout, clutter, double faces, abstract shapes, UI panels with words, overlapping elements, header, footer, top bar, navigation bar, bottom menu, toolbar, top text, website layout, browser frame, button row, page border, UI bar"
)

logging.info(f"Running inference with prompt: {text_prompt}")

try:
    result = pipeline(
        prompt=text_prompt,
        negative_prompt=negative_prompt,
        guidance_scale=7.5,
        num_inference_steps=30
    )
    generated_image = result.images[0]
    output_path = f"generated_image_{seed}.png"
    generated_image.save(output_path)
    logging.info(f"Image saved to: {output_path}")
    generated_image.show()
except Exception as e:
    logging.error(f"Error during image generation: {e}")
    raise

Hello

Getting same black image without LORA

Python & Torch Details:

(venv) D:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo>python
Python 3.10.10 (tags/v3.10.10:aad5f6a, Feb  7 2023, 17:20:36) [MSC v.1929 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>>
>>> # Check if torch was built with CUDA
>>> print("Is CUDA available? :", torch.cuda.is_available())
Is CUDA available? : True
>>> print("CUDA version (torch compiled with):", torch.version.cuda)
CUDA version (torch compiled with): 11.8
>>> print("Torch built with CUDA support:", torch.backends.cuda.is_built())
Torch built with CUDA support: True

Code:

import logging
from diffusers import AutoPipelineForText2Image, AutoencoderKL
import torch
import numpy as np
import random
import os
from PIL import Image

# =========================
# STEP 0: Logging Setup
# =========================
log_file = "generation_log.txt"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file),
        logging.StreamHandler()
    ]
)

logging.info("Initializing...")

# =========================
# STEP 1: Environment Setup
# =========================

torch.cuda.empty_cache()
torch.cuda.ipc_collect()

seed = random.randint(0, 9999999)
torch.manual_seed(seed)
np.random.seed(seed)
logging.info(f"Using seed: {seed}")

# ===============================
# STEP 2: Model and LoRA Setup
# ===============================
logging.info("Loading base model and LoRA weights...")

model_dir = "D:\\Ganu\\AIImage\\huggingface\\kohya_ss\\kohya_ss\\outputs"
lora_weights_path = os.path.join(model_dir, "model")
model_id = "stabilityai/stable-diffusion-xl-base-1.0"

# Optional: Custom VAE (uncomment if needed)
vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix",
    torch_dtype=torch.float16
).to("cuda")

try:
    pipeline = AutoPipelineForText2Image.from_pretrained(
        model_id,
        torch_dtype=torch.float16,
        variant="fp16",
    ).to("cuda")
    logging.info("Pipeline loaded to GPU with float16.")
except Exception as e:
    logging.error(f"Failed to load model pipeline: {e}")
    raise

#pipeline.enable_model_cpu_offload()

# If using VAE:
pipeline.vae = vae

pipeline.enable_attention_slicing()
pipeline.enable_vae_slicing()

"""try:
    pipeline.load_lora_weights(lora_weights_path, weight_name="last.safetensors")
    logging.info("LoRA weights loaded successfully.")
except ValueError as e:
    logging.error("Invalid LoRA checkpoint. Check the format or compatibility.")
    raise e
"""

# =========================
# STEP 3: Prompt Inference
# =========================
text_prompt = (
    "A wide, breathtaking landscape with all real vibrant nature-themed background, lush forests, mountains, and a Doctor standing prominently in the foreground"
)

negative_prompt = (
    "text, letters, words, signage, logos, labels, writing, messy background, busy layout, clutter, double faces, abstract shapes, UI panels with words, overlapping elements, header, footer, top bar, navigation bar, bottom menu, toolbar, top text, website layout, browser frame, button row, page border, UI bar"
)

logging.info(f"Running inference with prompt: {text_prompt}")

try:
    result = pipeline(
        prompt=text_prompt,
        negative_prompt=negative_prompt,
        guidance_scale=7.5,
        num_inference_steps=30
    )
    generated_image = result.images[0]
    output_path = f"generated_image_{seed}.png"
    generated_image.save(output_path)
    logging.info(f"Image saved to: {output_path}")
    generated_image.show()
except Exception as e:
    logging.error(f"Error during image generation: {e}")
    raise

Environment:

1. Libraries

pip list
Package            Version
------------------ ------------------
accelerate         0.21.0
aiofiles           24.1.0
annotated-types    0.7.0
anyio              4.9.0
certifi            2025.1.31
charset-normalizer 3.4.1
click              8.1.8
colorama           0.4.6
deepspeed          0.10.0+f5c834a6
diffusers          0.21.4
exceptiongroup     1.2.2
fastapi            0.115.12
ffmpy              0.5.0
filelock           3.18.0
flash-attention    1.0.0
fsspec             2025.3.2
gradio             5.27.1
gradio_client      1.9.1
groovy             0.1.2
h11                0.16.0
hjson              3.1.0
httpcore           1.0.9
httpx              0.28.1
huggingface-hub    0.16.4
idna               3.10
importlib_metadata 8.6.1
Jinja2             3.1.6
markdown-it-py     3.0.0
MarkupSafe         3.0.2
mdurl              0.1.2
mpmath             1.3.0
mypy_extensions    1.1.0
networkx           3.4.2
ninja              1.11.1.4
numpy              1.23.1
orjson             3.10.16
packaging          25.0
pandas             2.2.3
peft               0.15.2
pillow             11.2.1
pip                25.1.1
psutil             7.0.0
py-cpuinfo         9.0.0
pydantic           1.10.13
pydantic_core      2.33.1
pydub              0.25.1
Pygments           2.19.1
pyre-extensions    0.0.29
python-dateutil    2.9.0.post0
python-multipart   0.0.20
pytz               2025.2
PyYAML             6.0.2
regex              2024.11.6
requests           2.32.3
rich               14.0.0
ruff               0.11.7
safehttpx          0.1.6
safetensors        0.5.3
semantic-version   2.10.0
setuptools         65.5.0
shellingham        1.5.4
six                1.17.0
sniffio            1.3.1
starlette          0.46.2
sympy              1.14.0
tokenizers         0.13.3
tomlkit            0.13.2
torch              2.4.0+cu118
torchaudio         2.7.0+cu118
torchvision        0.22.0+cu118
tqdm               4.67.1
transformers       4.31.0
typer              0.15.3
typing_extensions  4.13.2
typing-inspect     0.9.0
typing-inspection  0.4.0
tzdata             2025.2
urllib3            2.4.0
uvicorn            0.34.2
websockets         15.0.1
xformers           0.0.27.post2+cu118
zipp               3.21.0

  1. nvdia-sim output
(venv) D:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo>nvidia-smi
Thu May 15 14:36:50 2025
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.40                 Driver Version: 576.40         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  NVIDIA GeForce GTX 1650      WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   47C    P8              5W /   50W |     698MiB /   4096MiB |      3%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+-----------------------------------------------------------------------------------------+
| Processes:                                                                              |
|  GPU   GI   CI              PID   Type   Process name                        GPU Memory |
|        ID   ID                                                               Usage      |
|=========================================================================================|
|    0   N/A  N/A            5648    C+G   ...IA app\CEF\NVIDIA Overlay.exe      N/A      |
|    0   N/A  N/A            6664    C+G   ...we\Microsoft.Media.Player.exe      N/A      |
|    0   N/A  N/A            7348    C+G   ...Chrome\Application\chrome.exe      N/A      |
|    0   N/A  N/A            7560    C+G   ...Chrome\Application\chrome.exe      N/A      |
|    0   N/A  N/A            7940    C+G   ....0.3240.64\msedgewebview2.exe      N/A      |
|    0   N/A  N/A            9104    C+G   C:\Windows\explorer.exe               N/A      |
|    0   N/A  N/A            9520    C+G   ...h_cw5n1h2txyewy\SearchApp.exe      N/A      |
|    0   N/A  N/A            9728    C+G   ...ntrolPanel\SystemSettings.exe      N/A      |
|    0   N/A  N/A           11968    C+G   ...h_cw5n1h2txyewy\SearchApp.exe      N/A      |
|    0   N/A  N/A           14064    C+G   ...5n1h2txyewy\TextInputHost.exe      N/A      |
|    0   N/A  N/A           15292    C+G   ...IA app\CEF\NVIDIA Overlay.exe      N/A      |
+-----------------------------------------------------------------------------------------+
1 Like

Hmm… Or CUDA Toolkit version issue?

accelerate                1.0.1
diffusers                 0.32.2
torch                     2.4.0+cu124
transformers              4.49.0.dev0

The simple code

from diffusers import DiffusionPipeline
import torch

pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float32
).to("cuda")

prompt = "A clear sunny landscape with mountains and a river"
image = pipe(prompt=prompt).images[0]
image.save("test_image.png")

returns the error

(venv) D:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo>python John-Training-15thMay2025.py
[2025-05-15 14:53:54,560] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
D:\Ganu\AIImage\venv\lib\site-packages\deepspeed-0.10.0+f5c834a6-py3.10.egg\deepspeed\runtime\zero\linear.py:49: FutureWarning: `torch.cuda.amp.custom_fwd(args...)` is deprecated. Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.
  def forward(ctx, input, weight, bias=None):
D:\Ganu\AIImage\venv\lib\site-packages\deepspeed-0.10.0+f5c834a6-py3.10.egg\deepspeed\runtime\zero\linear.py:67: FutureWarning: `torch.cuda.amp.custom_bwd(args...)` is deprecated. Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.
  def backward(ctx, grad_output):
Loading pipeline components...: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 7/7 [00:02<00:00,  2.48it/s]
Traceback (most recent call last):
  File "D:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo\John-Training-15thMay2025.py", line 7, in <module>
    ).to("cuda")
  File "D:\Ganu\AIImage\venv\lib\site-packages\diffusers\pipelines\pipeline_utils.py", line 733, in to
    module.to(torch_device, torch_dtype)
  File "D:\Ganu\AIImage\venv\lib\site-packages\transformers\modeling_utils.py", line 1900, in to
    return super().to(*args, **kwargs)
  File "D:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py", line 1174, in to
    return self._apply(convert)
  File "D:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py", line 780, in _apply
    module._apply(fn)
  File "D:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py", line 780, in _apply
    module._apply(fn)
  File "D:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py", line 780, in _apply
    module._apply(fn)
  File "D:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py", line 805, in _apply
    param_applied = fn(param)
  File "D:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py", line 1160, in convert
    return t.to(
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 242.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 10.41 GiB is allocated by PyTorch, and 262.02 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

and chatgpt says

You're running out of VRAM on your GPU (4 GB total, 0 bytes free), and you're trying to load Stable Diffusion XL (SDXL), which requires at least 12 GB of GPU memory, ideally more. The error is expected β€” SDXL is far too heavy for a 4 GB GPU.

P.s:

  1. The cpu version still works.
  2. And I remember the gpu version working earlier
  3. Do you want me to change the ( CUDA Toolkit version )
1 Like

Upgrading the CUDA toolkit is a last resort, so I’ll try these first. Also, the above error may simply be due to insufficient VRAM…

pip install -U accelerate
pip install diffusers==0.32.2 transformers<=4.48.3

pip install diffusers==0.32.2 transformers<=4.48.3
The system cannot find the file specified.

1 Like

Hmm…

pip install diffusers==0.32.2
pip install transformers==4.48.3

(venv) D:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo>python John-Training-15thMay2025.py
WARNING[XFORMERS]: xFormers can’t load C++/CUDA extensions. xFormers was built for:
PyTorch 2.4.0+cu118 with CUDA 1108 (you have 2.7.0+cu118)
Python 3.10.11 (you have 3.10.10)
Please reinstall xformers (see GitHub - facebookresearch/xformers: Hackable and optimized Transformers building blocks, supporting a composable construction.)
Memory-efficient attention, SwiGLU, sparse and more won’t be available.
Set XFORMERS_MORE_DETAILS=1 for more details
Loading pipeline components…: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 7/7 [00:02<00:00, 3.32it/s]
Traceback (most recent call last):
File β€œD:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo\John-Training-15thMay2025.py”, line 7, in
).to(β€œcuda”)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\diffusers\pipelines\pipeline_utils.py”, line 461, in to
module.to(device, dtype)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\transformers\modeling_utils.py”, line 3110, in to
return super().to(*args, **kwargs)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 1355, in to
return self._apply(convert)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 915, in _apply
module._apply(fn)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 915, in _apply
module._apply(fn)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 915, in _apply
module._apply(fn)
[Previous line repeated 3 more times]
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 942, in _apply
param_applied = fn(param)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 1341, in convert
return t.to(
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 10.66 GiB is allocated by PyTorch, and 239.73 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (CUDA semantics β€” PyTorch 2.7 documentation)

1 Like

WARNING[XFORMERS]: xFormers can’t load C++/CUDA extensions. xFormers was built for:

Oh…

pip uninstall xformers

(venv) D:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo>python John-Training-15thMay2025.py
Loading pipeline components…: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 7/7 [00:02<00:00, 3.35it/s]
Traceback (most recent call last):
File β€œD:\Ganu\AIImage\project\Train-10Images-chatgptParameters\runs\1sstrun-23thApril2025\generation\1stGo\John-Training-15thMay2025.py”, line 7, in
).to(β€œcuda”)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\diffusers\pipelines\pipeline_utils.py”, line 461, in to
module.to(device, dtype)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\diffusers\models\modeling_utils.py”, line 1077, in to
return super().to(*args, **kwargs)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 1355, in to
return self._apply(convert)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 915, in _apply
module._apply(fn)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 915, in _apply
module._apply(fn)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 915, in _apply
module._apply(fn)
[Previous line repeated 2 more times]
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 942, in _apply
param_applied = fn(param)
File β€œD:\Ganu\AIImage\venv\lib\site-packages\torch\nn\modules\module.py”, line 1341, in convert
return t.to(
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 14.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 10.67 GiB is allocated by PyTorch, and 229.42 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (CUDA semantics β€” PyTorch 2.7 documentation)

1 Like

Hmm…

from diffusers import DiffusionPipeline
import torch

pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
   #torch_dtype=torch.float32 # too large
    torch_dtype=torch.float16
).to("cuda")

prompt = "A clear sunny landscape with mountains and a river"
image = pipe(prompt=prompt).images[0]
image.save("test_image.png")

Still getting a plain black image with the following code

from diffusers import DiffusionPipeline
import torch

pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
   #torch_dtype=torch.float32 # too large
    torch_dtype=torch.float16
).to("cuda")

prompt = "A clear sunny landscape with mountains and a river"
image = pipe(prompt=prompt).images[0]
image.save("test_image.png")

1 Like

I found an old 10x0 related issue…

from diffusers import DiffusionPipeline, AutoencoderKL
import torch

torch.backends.cudnn.benchmark = True # https://github.com/huggingface/diffusers/issues/1556
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to("cuda")
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", vae=vae, variant="fp16", torch_dtype=torch.float16).to("cuda")

prompt = "A clear sunny landscape with mountains and a river"
image = pipe(prompt=prompt).images[0]
image.save("test_image.png")

A green blank image is being created with the above code…

1 Like

Green…? It’s surreal, but it’s good to see a change. That this might be a CUDA Toolkit version issue. I’ll do some searching.

1 Like