I am trying to send data to azure speech SDK to transcribe. I want it to receive data from a python file, put in a buffer and then transcribe continuously. I am using this sample from azure speech SDK.
def speech_recognition_with_pull_stream():
"""gives an example how to use a pull audio stream to recognize speech from a custom audio
source"""
class WavFileReaderCallback(speechsdk.audio.PullAudioInputStreamCallback):
"""Example class that implements the Pull Audio Stream interface to recognize speech from
an audio file"""
def __init__(self, filename: str):
super().__init__()
self._file_h = wave.open(filename, mode=None)
self.sample_width = self._file_h.getsampwidth()
assert self._file_h.getnchannels() == 1
assert self._file_h.getsampwidth() == 2
assert self._file_h.getframerate() == 16000
assert self._file_h.getcomptype() == 'NONE'
def read(self, buffer: memoryview) -> int:
"""read callback function"""
size = buffer.nbytes
frames = self._file_h.readframes(size // self.sample_width)
buffer[:len(frames)] = frames
return len(frames)
def close(self):
"""close callback function"""
self._file_h.close()
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# specify the audio format
wave_format = speechsdk.audio.AudioStreamFormat(samples_per_second=16000, bits_per_sample=16,
channels=1)
# setup the audio stream
callback = WavFileReaderCallback(weatherfilename)
stream = speechsdk.audio.PullAudioInputStream(callback, wave_format)
audio_config = speechsdk.audio.AudioConfig(stream=stream)
# instantiate the speech recognizer with pull stream input
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
done = False
def stop_cb(evt):
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
# Connect callbacks to the events fired by the speech recognizer
speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
# Start continuous speech recognition
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
Instead of getting data from an audio file 'callback = WavFileReaderCallback(weatherfilename)', I want to get data from another python file that sends data by:
tcp_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
.
.
.
tcp_client.sendall(bytes(data))
how can I get this data into the buffer for the speech SDK to transcribe. Please guide, Thanks.
I'm Darren from the Speech SDK team. Please have a look at the speech_recognition_with_push_stream Python sample on the SpeechSDK GitHub repo: https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/054b4783de9d52f28109c435bf90e073513fec97/samples/python/console/speech_sample.py#L417
I think that's what you are looking for.
Depending on your data availability model, an alternative may be the speech_recognition_with_pull_stream: https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/054b4783de9d52f28109c435bf90e073513fec97/samples/python/console/speech_sample.py#L346
Feel free to open a GitHub issue if you need further assistant: https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues
Thanks,
Darren