The program is able to record the sound but, I am unable to exit the process from recording the audio.
import sounddevice as sd
import soundfile as sf
import time
import numpy as np
from transformers import pipeline
from pynput import keyboard as key
import queue
import sys
import tempfile
import os
assert np
whisper = pipeline(
'automatic-speech-recognition',
model = 'openai/whisper-tiny',
device = 0
)
def record_audio():
q = queue.Queue()
fs = 44100
timestamp = time.strftime("%Y%m%d-%H%M%S")
audFile = tempfile.mktemp(prefix=f"audiofile_{timestamp}", suffix='.wav', dir="audios")
def callback(indata, frames, time, status):
nonlocal q
if status:
print(status, file=sys.stderr)
q.put(indata.copy())
try:
with sf.SoundFile(audFile, mode = 'x', samplerate = fs, channels = 1) as file:
with sd.InputStream(samplerate = fs, channels = 1, callback = callback):
try:
while True:
file.write(q.get())
except:
pass
finally:
return audFile
def main():
recording = False
aud_file = None
while True:
with key.Events() as events:
for event in events:
if isinstance(event, key.Events.Press) and event.key == key.Key.space:
if not recording:
print("Recording started...")
aud_file = record_audio()
recording = True
else:
print("Recording stopped.")
recording = False
print(whisper(aud_file))
if __name__ == "__main__":
main()
What I wanted to do was hold down space bar and the audio would record and upon release the audio would be stored locally and then I could pass it to the whisper pipeline, for transcription.
I am using Linux, and unable to utilise pyaudio and keyboard libraries.