Hello
I have been working on the tutorial from HF Learn on Transformers for Audio and encountered an issue on Section 5 (evaluation) because it looks like the code is deprecated or something. It has something to do with the PipelineIterator class.
from tqdm import tqdm
from transformers.pipelines.pt_utils import KeyDataset
from transformers import pipeline
import torch
############### READING A DATASET ###############
from datasets import load_dataset
common_voice_test = load_dataset("mozilla-foundation/common_voice_13_0", "dv", split="test",trust_remote_code=True)
############### PREPARING WHISPER PIPELINE ###############
device = "cuda:0"
torch_dtype = torch.float16
pipe = pipeline(
"automatic-speech-recognition",
model="openai/whisper-small",
torch_dtype=torch_dtype,
device=device,
)
############### PREPARING PIPELINE ITERATOR ###############
pipeline_iterator = pipe(
KeyDataset(common_voice_test, "audio"),
max_new_tokens=128,
generate_kwargs={"task": "transcribe"},
batch_size=16,
)
print("Size of common_voice_test",len(common_voice_test),"Batch size",16,"Size of pipeline_iterator",len(pipeline_iterator))
############### USING PIPELINE ITERATOR ###############
all_predictions = []
# run streamed inference
for prediction in pipeline_iterator:
all_predictions.append(prediction["text"])
The “Use pipepline iterator” does not run because the pipeline iterator cannot be iterated.
Size of common_voice_test 2212 Batch size 16 Size of pipeline_iterator 139
Traceback (most recent call last):
File "c:\Users\crass\Downloads\HO1.py", line 39, in <module>
for prediction in pipeline_iterator:
File "C:\Users\crass\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\transformers\pipelines\pt_utils.py", line 124, in __next__
item = next(self.iterator)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\crass\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\transformers\pipelines\pt_utils.py", line 269, in __next__
processed = self.infer(next(self.iterator), **self.params)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\crass\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\torch\utils\data\dataloader.py", line 631, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "C:\Users\crass\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\torch\utils\data\dataloader.py", line 675, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\crass\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\torch\utils\data\_utils\fetch.py", line 42, in fetch
return self.collate_fn(data)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\crass\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\transformers\pipelines\base.py", line 194, in inner
padded[key] = _pad(items, key, _padding_value, padding_side)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\crass\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\transformers\pipelines\base.py", line 100, in _pad
max_length = max(item[key].shape[1] for item in items)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\crass\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\transformers\pipelines\base.py", line 100, in <genexpr>
max_length = max(item[key].shape[1] for item in items)
~~~~~~~~~~~~~~~^^^
IndexError: tuple index out of range
Note that I use transformers version 4.42.4, maybe I should downgrade?
Thank you