Error when trying to run IP-Adapter-Face-ID using inference endpoints

I m trying to run the below code but getting error that says ERROR | Input type (torch.cuda.HalfTensor) and weight type (torch.cuda.FloatTensor) should be the same

I m running this on a GPU machine 4x GPU 64. Please help

import torch
from huggingface_hub import hf_hub_download
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from transformers import AutoFeatureExtractor
from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
from insightface.app import FaceAnalysis
from insightface.utils import face_align
import io
import requests
import json
import cv2
import numpy

device = "cuda"

class EndpointHandler():
    def __init__(self, path=""):
        self.v2 = True
        base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
        vae_model_path = "stabilityai/sd-vae-ft-mse"
        image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
        ip_plus_ckpt = hf_hub_download(repo_id="h94/IP-Adapter-FaceID", filename="ip-adapter-faceid-plus_sd15.bin" if not self.v2 else "ip-adapter-faceid-plusv2_sd15.bin", repo_type="model")
        safety_model_id = "CompVis/stable-diffusion-safety-checker"
        safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id)
        safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
        noise_scheduler = DDIMScheduler(
            num_train_timesteps=1000,
            beta_start=0.00085,
            beta_end=0.012,
            beta_schedule="scaled_linear",
            clip_sample=False,
            set_alpha_to_one=False,
            steps_offset=1,
        )
        vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)

        self.pipe = StableDiffusionPipeline.from_pretrained(
            base_model_path,
            torch_dtype=torch.float16,
            scheduler=noise_scheduler,
            vae=vae,
            feature_extractor=safety_feature_extractor,
            safety_checker=safety_checker
        )
        self.pipe.to(device)
        self.face_app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
        self.face_app.prepare(ctx_id=0, det_size=(640, 640))
        self.processor = IPAdapterFaceIDPlus(self.pipe, image_encoder_path, ip_plus_ckpt, device)
    
    def __call__(self, request):
        print(request)
        data = json.loads(request["inputs"])
        image_url = data["image_url"]
        prompt = data["prompt"]
        likeness_strength = data["likeness_strength"]
        face_strength = data["face_strength"]
        negative_prompt = data["negative_prompt"]

        image = download_image(image_url)
        
        face = cv2.imdecode(numpy.frombuffer(image, numpy.uint8), -1)
        faces = self.face_app.get(face)

        faceid_embed = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
        face_image = face_align.norm_crop(face, landmark=faces[0].kps, image_size=224) # you can also segment the face
        total_negative_prompt = f"{negative_prompt} naked, bikini, skimpy, scanty, bare skin, lingerie, swimsuit, exposed, see-through"

        print('Genrating image')
        generated_images = self.processor.generate(
        prompt=prompt, negative_prompt=total_negative_prompt,face_image=face_image, faceid_embeds=faceid_embed,
        shortcut=self.v2, s_scale=1.0, width=512, height=512, num_inference_steps=30, num_samples=2
        )
        print('generated images', generated_images)
        urls = upload_to_crafto(generated_images)
        return {"urls":urls}
    
def upload_to_crafto(images):
    image_data = {}
    for i, image in enumerate(images):
        with io.BytesIO() as buffer:
            image.save(buffer, format="JPEG")  # Adjust format as needed
            image_data[f"image_{i}"] = (f"image_{i}.jpg", buffer.getvalue(), "image/jpeg")

    res = requests.post(
        "https://xxxx.xxx/crafto/v1.0/media/political/upload",
        files=image_data
    )
    print(res.json())
    return res.json()
    
def download_image(image_url):
    image_res = requests.get(image_url)
    return image_res.content