I’m trying to use StableDiffusionPipeline to create a first image from a prompt, and then turn it into a video by running StableDiffusionImg2ImgPipeline for 200 frames. The StableDiffusionPipeline image is fine, but no matter what settings I use, the Img2Img pipeline always devolves into craziness.
I’ve tried many different strengths and inference steps, but they all end up the same. For some reason, usually very high in purple. Here is an example:
My current code is
torch.cuda.empty_cache()
images = []
log_line(log, 'Generate button clicked')
nsfw = False
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.to('cuda')
with autocast('cuda'):
data = pipe(prompt, num_inference_steps=inference_steps)
log_line(log, data)
image = data.images[0]
images.append(image)
if data.nsfw_content_detected[0]:
st.error("Prompt returned nsfw")
nsfw = True
log_line(log, f'Frame 1 returned')
size = (image.size[1], image.size[0])
filename = f'{str(uuid.uuid4())}.mp4'
log_line(log, f'Creating file {filename}')
video = cv.VideoWriter(filename, cv.VideoWriter_fourcc(*'mp4v'), 10, size)
video.write(np.array(data.images[0]))
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id, use_auth_token=token).from_pretrained(model_id, torch_dtype=torch.float16)
pipe.to('cuda')
for n in range(frames-1):
with autocast("cuda"):
data = pipe(prompt, init_image=image, num_inference_steps=inference_steps, strength=strength)
log_line(log, f'Frame {n+2} rendered - steps: {inference_steps}, strength={strength}')
if data.nsfw_content_detected[0]:
log_line(log, f'Frame {n+2} skipped for nsfw')
else:
image = data.images[0]
images.append(image)
output.empty()
output.image(images)
video.write(np.array(image))
video.release()