Program not working on GPU but works on CPU

1650Ti doesn’t support fp16…?

so it won’t work, right? I should go ahead with cpu

import logging
from diffusers import DiffusionPipeline
import torch

# =========================
# STEP 0: Logging Setup
# =========================

# Remove all existing logging handlers
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

log_file = "generation_log.txt"

# Set up logging to file (and optionally console)
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_file, mode='w'),
        logging.StreamHandler()  # Remove this line if you want logs only in the file
    ]
)

# Enable DEBUG logging for all major libraries
libraries_to_log = [
    "accelerate", "aiofiles", "annotated_types", "anyio", "certifi", "charset_normalizer",
    "click", "colorama", "deepspeed", "diffusers", "exceptiongroup", "fastapi", "ffmpy",
    "filelock", "flash_attention", "fsspec", "gradio", "gradio_client", "groovy", "h11",
    "hjson", "httpcore", "httpx", "huggingface_hub", "idna", "importlib_metadata", "Jinja2",
    "markdown_it_py", "MarkupSafe", "mdurl", "mpmath", "mypy_extensions", "networkx",
    "ninja", "numpy", "orjson", "packaging", "pandas", "peft", "pillow", "psutil", "py-cpuinfo",
    "pydantic", "pydantic_core", "pydub", "Pygments", "pyre_extensions", "python_dateutil",
    "python_multipart", "pytz", "PyYAML", "regex", "requests", "rich", "ruff", "safehttpx",
    "safetensors", "semantic_version", "setuptools", "shellingham", "six", "sniffio",
    "starlette", "sympy", "tokenizers", "tomlkit", "torch", "torchaudio", "torchvision",
    "tqdm", "transformers", "typer", "typing_extensions", "typing_inspect",
    "typing_inspection", "tzdata", "urllib3", "uvicorn", "websockets", "zipp"
]

for lib in libraries_to_log:
    logging.getLogger(lib).setLevel(logging.DEBUG)

# =========================
# STEP 1: Model Load & Image Generation
# =========================

logger = logging.getLogger(__name__)
logger.info("Loading the diffusion pipeline...")

pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float32
).to("cpu")

logger.info("Pipeline loaded successfully.")

prompt = "A clear sunny landscape with mountains and a river"
logger.info(f"Generating image for prompt: {prompt}")

image = pipe(prompt=prompt).images[0]
image.save("test_image.png")

logger.info("Image saved to test_image.png")

1 Like

Or maybe we should write some code to make better use of the GPU while keeping it as float32…
For example, quantizing or placing only VAE on the GPU…
Well, if speed becomes an issue, we’ll just try some trial and error.