Hmm… So simply this?
# Optional: Custom VAE (uncomment if needed)
vae = AutoencoderKL.from_pretrained(
"madebyollin/sdxl-vae-fp16-fix",
torch_dtype=torch.float16
).to("cuda")
try:
pipeline = AutoPipelineForText2Image.from_pretrained(
model_id,
torch_dtype=torch.float16,
variant="fp16",
#device_map="auto"
).to("cuda")
logging.info("Pipeline loaded to GPU with float16.")
except Exception as e:
logging.error(f"Failed to load model pipeline: {e}")
raise
# pipeline.enable_model_cpu_offload()
# If using VAE:
pipeline.vae = vae