I tried to use the byT5 text encoder with the diffusers StableDiffusionPipeline.
Here is the code:
def add_byt5_text_encoder_to_pipeline(pipeline: DiffusionPipeline) -> DiffusionPipeline:
text_encoder: T5ForConditionalGeneration = T5ForConditionalGeneration.from_pretrained("google/byt5-small")
tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained("google/byt5-small")
pipeline.text_encoder = text_encoder
pipeline.tokenizer = tokenizer
return pipeline
But i get the following error:
ile "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\streamlit\runtime\scriptrunner\script_runner.py", line 564, in _run_script
exec(code, module.__dict__)
File "C:\dev\repos\HandmadeAI\stable-diffusion-streamlit\pages\Txt2Img.py", line 109, in <module>
run()
File "C:\dev\repos\HandmadeAI\stable-diffusion-streamlit\pages\Txt2Img.py", line 105, in run
layout(pipeline)
File "C:\dev\repos\HandmadeAI\stable-diffusion-streamlit\pages\Txt2Img.py", line 80, in layout
generated_images = txt2img(pipeline,
File "C:\dev\repos\HandmadeAI\stable-diffusion-streamlit\pages\Txt2Img.py", line 54, in txt2img
generated_images = pipeline(prompt=prompt,
File "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\torch\autograd\grad_mode.py", line 34, in decorate_context
return func(*args, **kwargs)
File "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\diffusers\pipelines\stable_diffusion\pipeline_stable_diffusion.py", line 568, in __call__
prompt_embeds = self._encode_prompt(
File "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\diffusers\pipelines\stable_diffusion\pipeline_stable_diffusion.py", line 262, in _encode_prompt
text_inputs = self.tokenizer(
File "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\transformers\tokenization_utils_base.py", line 2523, in __call__
encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)
File "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\transformers\tokenization_utils_base.py", line 2629, in _call_one
return self.encode_plus(
File "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\transformers\tokenization_utils_base.py", line 2702, in encode_plus
return self._encode_plus(
File "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\transformers\tokenization_utils.py", line 652, in _encode_plus
return self.prepare_for_model(
File "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\transformers\tokenization_utils_base.py", line 3182, in prepare_for_model
encoded_inputs = self.pad(
File "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\transformers\tokenization_utils_base.py", line 2987, in pad
encoded_inputs = self._pad(
File "C:\Users\Chris\anaconda3\envs\sd-pytorch2\lib\site-packages\transformers\tokenization_utils_base.py", line 3374, in _pad
encoded_inputs["attention_mask"] = encoded_inputs["attention_mask"] + [0] * difference
OverflowError: cannot fit 'int' into an index-sized integer
What is the problem? Is there another way to use the ByT5 encoder with a diffusers pipeline?
I would be really grateful for help.