1650Ti doesn’t support fp16…?
1 Like
so it won’t work, right? I should go ahead with cpu
import logging
from diffusers import DiffusionPipeline
import torch
# =========================
# STEP 0: Logging Setup
# =========================
# Remove all existing logging handlers
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
log_file = "generation_log.txt"
# Set up logging to file (and optionally console)
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file, mode='w'),
logging.StreamHandler() # Remove this line if you want logs only in the file
]
)
# Enable DEBUG logging for all major libraries
libraries_to_log = [
"accelerate", "aiofiles", "annotated_types", "anyio", "certifi", "charset_normalizer",
"click", "colorama", "deepspeed", "diffusers", "exceptiongroup", "fastapi", "ffmpy",
"filelock", "flash_attention", "fsspec", "gradio", "gradio_client", "groovy", "h11",
"hjson", "httpcore", "httpx", "huggingface_hub", "idna", "importlib_metadata", "Jinja2",
"markdown_it_py", "MarkupSafe", "mdurl", "mpmath", "mypy_extensions", "networkx",
"ninja", "numpy", "orjson", "packaging", "pandas", "peft", "pillow", "psutil", "py-cpuinfo",
"pydantic", "pydantic_core", "pydub", "Pygments", "pyre_extensions", "python_dateutil",
"python_multipart", "pytz", "PyYAML", "regex", "requests", "rich", "ruff", "safehttpx",
"safetensors", "semantic_version", "setuptools", "shellingham", "six", "sniffio",
"starlette", "sympy", "tokenizers", "tomlkit", "torch", "torchaudio", "torchvision",
"tqdm", "transformers", "typer", "typing_extensions", "typing_inspect",
"typing_inspection", "tzdata", "urllib3", "uvicorn", "websockets", "zipp"
]
for lib in libraries_to_log:
logging.getLogger(lib).setLevel(logging.DEBUG)
# =========================
# STEP 1: Model Load & Image Generation
# =========================
logger = logging.getLogger(__name__)
logger.info("Loading the diffusion pipeline...")
pipe = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float32
).to("cpu")
logger.info("Pipeline loaded successfully.")
prompt = "A clear sunny landscape with mountains and a river"
logger.info(f"Generating image for prompt: {prompt}")
image = pipe(prompt=prompt).images[0]
image.save("test_image.png")
logger.info("Image saved to test_image.png")
1 Like
Or maybe we should write some code to make better use of the GPU while keeping it as float32…
For example, quantizing or placing only VAE on the GPU…
Well, if speed becomes an issue, we’ll just try some trial and error.
chatgpt improved on the code, and now there is some result but imperfect.
Image
Code
from flask import Flask, send_file, request
from diffusers import StableDiffusionXLPipeline
import torch, os, numpy as np, random, logging
app = Flask(__name__)
model_dir = "D:\\Ganu\\AIImage\\apache-webserver\\Apache24\\htdocs"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
pipeline = None
def load_pipeline():
global pipeline
if pipeline is not None:
return
print(f"[INFO] Loading full SDXL pipeline on {device} with float32...")
pipeline_local = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float32,
use_safetensors=True
).to(device)
pipeline_local.enable_attention_slicing()
pipeline_local.safety_checker = lambda images, **kwargs: (images, False)
pipeline_local.set_progress_bar_config(disable=True)
pipeline = pipeline_local
logging.info("[INFO] Full pipeline loaded on GPU.")
@app.route("/")
def home():
return """
<html>
<head><title>SDXL Generator</title></head>
<body style="font-family: sans-serif;">
<h1>Stable Diffusion XL (float32)</h1>
<form action="/generate" method="get">
Prompt: <input type="text" name="prompt" size="60">
<input type="submit" value="Generate">
</form>
<p>Try: <a href="/generate?prompt=Futuristic+Indian+market">Futuristic Indian market</a></p>
</body>
</html>
"""
@app.route("/generate")
def generate_image():
load_pipeline()
seed = request.args.get("seed", default=random.randint(0, 9999999), type=int)
torch.manual_seed(seed)
np.random.seed(seed)
prompt = request.args.get("prompt", default="", type=str).strip()
if not prompt:
return "Error: prompt required", 400
width = request.args.get("width", default=512, type=int)
height = request.args.get("height", default=512, type=int)
def round_to_64(x): return max(64, int(round(x / 64)) * 64)
width = min(round_to_64(width), 768)
height = min(round_to_64(height), 768)
negative_prompt = (
"text, watermark, blurry, deformed, double face, bad proportions, UI elements"
)
print(f"[INFO] Generating image (seed={seed}, size={width}x{height})")
result = pipeline(
prompt=prompt,
negative_prompt=negative_prompt,
width=width,
height=height,
guidance_scale=7.5,
num_inference_steps=20
)
image = result.images[0]
output_dir = os.path.join(model_dir, "outputs")
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, f"generated_image_{seed}.png")
image.save(output_path)
print(f"[INFO] Image saved: {output_path}")
return send_file(output_path, mimetype='image/png')
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
app.run(host="127.0.0.1", port=80, debug=True)
1 Like
This may improve VRAM consumption a little.
pipeline_local.enable_attention_slicing()
pipeline_local.enable_model_cpu_offload() # this