Hello All,
I am trying to run parallel models on Gradio for the ASR.
I am loading the base model and I have 2 transcribe methods for each model, but in this case it is the same model for testing purposes. When I run it with the single interface on the Parallel method, it works fine.
iface = Parallel(io1,
theme='huggingface',
inputs = gr.Audio(source="upload", type="filepath"),
outputs=output_1
)
But when I run 2 or more interfaces with models in it, I get the following error.
iface = Parallel(io1,io1,
theme='huggingface',
inputs = gr.Audio(source="upload", type="filepath"),
outputs=output_1
)
iface.launch()
The implementation of io1 is as follows:
def transcribe1(audio):
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
result_tr = model_base.transcribe(audio ,task='translate')
return result_tr['text']
io1 = gr.Interface(transcribe1,'audio','text')
The error I get when running 2 models is as follows:
/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/gradio/helpers.py:637: UserWarning: Unexpected argument. Filling with None.
warnings.warn("Unexpected argument. Filling with None.")
/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/transcribe.py:114: UserWarning: FP16 is not supported on CPU; using FP32 instead
warnings.warn("FP16 is not supported on CPU; using FP32 instead")
Traceback (most recent call last):
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/gradio/routes.py", line 393, in run_predict
output = await app.get_blocks().process_api(
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/gradio/blocks.py", line 1069, in process_api
result = await self.call_function(
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/gradio/blocks.py", line 876, in call_function
prediction = await fn(*processed_input)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/gradio/mix.py", line 42, in parallel_fn
return_values_with_durations = await asyncio.gather(
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/gradio/blocks.py", line 878, in call_function
prediction = await anyio.to_thread.run_sync(
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/anyio/to_thread.py", line 28, in run_sync
return await get_asynclib().run_sync_in_worker_thread(func, *args, cancellable=cancellable,
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 818, in run_sync_in_worker_thread
return await future
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 754, in run
result = context.run(func, *args)
File "/var/folders/fx/_428pk0566d75r4q1gvhgbh80000gn/T/ipykernel_38387/2430703371.py", line 42, in transcribe2
result_tr = model_base.transcribe(audio ,task='translate')
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/transcribe.py", line 229, in transcribe
result: DecodingResult = decode_with_fallback(mel_segment)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/transcribe.py", line 164, in decode_with_fallback
decode_result = model.decode(segment, options)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/decoding.py", line 811, in decode
result = DecodingTask(model, options).run(mel)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/decoding.py", line 724, in run
tokens, sum_logprobs, no_speech_probs = self._main_loop(audio_features, tokens)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/decoding.py", line 673, in _main_loop
logits = self.inference.logits(tokens, audio_features)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/decoding.py", line 157, in logits
return self.model.decoder(tokens, audio_features, kv_cache=self.kv_cache)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/model.py", line 211, in forward
x = block(x, xa, mask=self.mask, kv_cache=kv_cache)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/model.py", line 136, in forward
x = x + self.attn(self.attn_ln(x), mask=mask, kv_cache=kv_cache)[0]
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/model.py", line 90, in forward
wv, qk = self.qkv_attention(q, k, v, mask)
File "/usr/local/anaconda3/envs/aimlstuff/lib/python3.10/site-packages/whisper/model.py", line 104, in qkv_attention
qk = qk + mask[:n_ctx, :n_ctx]
RuntimeError: The size of tensor a (6) must match the size of tensor b (3) at non-singleton dimension 3