import speech_recognition as sr
from transformers import pipeline
import numpy as np
model = pipeline(model="facebook/wav2vec2-base-960h")
# obtain audio from the microphone
r = sr.Recognizer()
with sr.Microphone() as source:
print("Say something!")
audio = r.listen(source)
#convert audio buffer to numpy array
data = np.frombuffer(audio.get_raw_data())
output = model(data)
print(output)
Downloading: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.79k/2.79k [00:00<00:00, 1.69MB/s]
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Say something!
C:\Users\nandurisai.venkatara\Anaconda3\envs\PT-BR-Sentiment\lib\site-packages\numpy\core\_methods.py:179: RuntimeWarning: invalid value encountered in reduce
ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
C:\Users\nandurisai.venkatara\Anaconda3\envs\PT-BR-Sentiment\lib\site-packages\numpy\core\_methods.py:212: RuntimeWarning: invalid value encountered in reduce
arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)
{'text': ''}