I m trying to run the below code but getting error that says ERROR | Input type (torch.cuda.HalfTensor) and weight type (torch.cuda.FloatTensor) should be the same
I m running this on a GPU machine 4x GPU 64. Please help
import torch
from huggingface_hub import hf_hub_download
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from transformers import AutoFeatureExtractor
from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
from insightface.app import FaceAnalysis
from insightface.utils import face_align
import io
import requests
import json
import cv2
import numpy
device = "cuda"
class EndpointHandler():
def __init__(self, path=""):
self.v2 = True
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
vae_model_path = "stabilityai/sd-vae-ft-mse"
image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
ip_plus_ckpt = hf_hub_download(repo_id="h94/IP-Adapter-FaceID", filename="ip-adapter-faceid-plus_sd15.bin" if not self.v2 else "ip-adapter-faceid-plusv2_sd15.bin", repo_type="model")
safety_model_id = "CompVis/stable-diffusion-safety-checker"
safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id)
safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
noise_scheduler = DDIMScheduler(
num_train_timesteps=1000,
beta_start=0.00085,
beta_end=0.012,
beta_schedule="scaled_linear",
clip_sample=False,
set_alpha_to_one=False,
steps_offset=1,
)
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
self.pipe = StableDiffusionPipeline.from_pretrained(
base_model_path,
torch_dtype=torch.float16,
scheduler=noise_scheduler,
vae=vae,
feature_extractor=safety_feature_extractor,
safety_checker=safety_checker
)
self.pipe.to(device)
self.face_app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
self.face_app.prepare(ctx_id=0, det_size=(640, 640))
self.processor = IPAdapterFaceIDPlus(self.pipe, image_encoder_path, ip_plus_ckpt, device)
def __call__(self, request):
print(request)
data = json.loads(request["inputs"])
image_url = data["image_url"]
prompt = data["prompt"]
likeness_strength = data["likeness_strength"]
face_strength = data["face_strength"]
negative_prompt = data["negative_prompt"]
image = download_image(image_url)
face = cv2.imdecode(numpy.frombuffer(image, numpy.uint8), -1)
faces = self.face_app.get(face)
faceid_embed = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
face_image = face_align.norm_crop(face, landmark=faces[0].kps, image_size=224) # you can also segment the face
total_negative_prompt = f"{negative_prompt} naked, bikini, skimpy, scanty, bare skin, lingerie, swimsuit, exposed, see-through"
print('Genrating image')
generated_images = self.processor.generate(
prompt=prompt, negative_prompt=total_negative_prompt,face_image=face_image, faceid_embeds=faceid_embed,
shortcut=self.v2, s_scale=1.0, width=512, height=512, num_inference_steps=30, num_samples=2
)
print('generated images', generated_images)
urls = upload_to_crafto(generated_images)
return {"urls":urls}
def upload_to_crafto(images):
image_data = {}
for i, image in enumerate(images):
with io.BytesIO() as buffer:
image.save(buffer, format="JPEG") # Adjust format as needed
image_data[f"image_{i}"] = (f"image_{i}.jpg", buffer.getvalue(), "image/jpeg")
res = requests.post(
"https://xxxx.xxx/crafto/v1.0/media/political/upload",
files=image_data
)
print(res.json())
return res.json()
def download_image(image_url):
image_res = requests.get(image_url)
return image_res.content