Running pyttsx3 inside a game loop

1.9k views Asked by At

I’m trying to use pyttsx3 in a game where speech is said in response to events within the game, however I can’t figure out how to execute speech commands without pausing the game loop until the audio finishes. Using runAndWait() is no good for this reason, and I can’t get the example in the docs using iterate() to work as I need it to either. See the code below:

import pyttsx3
import pygame as pg

def onStart(name):
   print('start')

def onWord(name, location, length):
    print('word', name, location, length)

def onEnd(name, completed):
   print('end')

def speak(*args, **kwargs):
    engine.connect('started-utterance', onStart)
    engine.connect('started-word', onWord)
    engine.connect('finished-utterance', onEnd)
    engine.say(*args, **kwargs)

def main():
    engine.startLoop(False)
    n = 0
    while True:

        n += 1
        print(n)
        if n == 3:
            speak('The first utterance.')
        elif n == 6:
            speak('Another utterance.')

        engine.iterate()
        clock.tick(1)

if __name__ == '__main__':
    pg.init()
    engine = pyttsx3.init()
    clock = pg.time.Clock()
    main()
    pg.quit()
    sys.exit()

In this example, the first statement is triggered at the right time, but it seems like pyttsx3 stops processing at that point - no further say commands produce any sound, and only the first started-utterance event fires - the started-word and finished-utterance commands never fire. I’ve tried this every way I can think of and still can’t get it to work. Any ideas?

1

There are 1 answers

1
import random On

I'm not familiar with pyttx3, but I've created an example that incorporates a pygame event loop. The colour changes with every mouse click event and the name of the colour is spoken.

import random
import pygame
import pyttsx3

WIDTH = 640
HEIGHT = 480
FPS = 120

simple_colors = [
    "red",
    "orange",
    "yellow",
    "green",
    "blue",
    "violet",
    "purple",
    "black",
    "white",
    "brown",
]

pygame.init()
window = pygame.display.set_mode((WIDTH, HEIGHT))
clock = pygame.time.Clock()

# TTS Setup
engine = pyttsx3.init()
engine.startLoop(False)  # have to call iterate in the main loop

# set a random color
current_color = random.choice(simple_colors)
# create a centered rectangle to fill with color
color_rect = pygame.Rect(10, 10, WIDTH - 20, HEIGHT - 20)
frames = 0
paused = False
running = True
while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        elif event.type == pygame.MOUSEBUTTONUP:
            current_color = random.choice(simple_colors)
            engine.stop()  # interrupt current speech?
            engine.say(f"The next color is {current_color}")

    # update game elements
    pygame.display.set_caption(
        f"Color: {current_color:10} Frame: {frames:10} FPS: {clock.get_fps():.1f}"
    )
    # draw surface - fill background
    window.fill(pygame.color.Color("grey"))
    ## draw image
    window.fill(pygame.color.Color(current_color), color_rect)
    # show surface
    pygame.display.update()
    # call TTS Engine
    engine.iterate()
    # limit frames
    clock.tick(FPS)
    frames += 1
pygame.quit()
engine.endLoop()

If you run this you'll see a repetition of the problem you described, the game loop pausing, indicated by the frame count in the title bar. I used a longer sentence rather than just the colour name to exacerbate the issue. Perhaps someone with a better understanding of pyttx3 will understand where we've both gone wrong. In any case we can get around this by running the Text to Speech engine in a different thread.

import random
import threading
import queue
import pygame
import pyttsx3


# Thread for Text to Speech Engine
class TTSThread(threading.Thread):
    def __init__(self, queue):
        threading.Thread.__init__(self)
        self.queue = queue
        self.daemon = True
        self.start()

    def run(self):
        tts_engine = pyttsx3.init()
        tts_engine.startLoop(False)
        t_running = True
        while t_running:
            if self.queue.empty():
                tts_engine.iterate()
            else:
                data = self.queue.get()
                if data == "exit":
                    t_running = False
                else:
                    tts_engine.say(data)
        tts_engine.endLoop()


WIDTH = 640
HEIGHT = 480
FPS = 120

simple_colors = [
    "red",
    "orange",
    "yellow",
    "green",
    "blue",
    "violet",
    "purple",
    "black",
    "white",
    "brown",
]

pygame.init()
window = pygame.display.set_mode((WIDTH, HEIGHT))
clock = pygame.time.Clock()

# create a queue to send commands from the main thread
q = queue.Queue()
tts_thread = TTSThread(q)  # note: thread is auto-starting

# set a random color
current_color = random.choice(simple_colors)
# Initial voice message
q.put(f"The first color is {current_color}")
# create a centered rectangle to fill with color
color_rect = pygame.Rect(10, 10, WIDTH - 20, HEIGHT - 20)
frames = 0
paused = False
running = True
while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
            q.put("exit")
        elif event.type == pygame.MOUSEBUTTONUP:
            current_color = random.choice(simple_colors)
            q.put(f"The next color is {current_color}")

    # update game elements
    pygame.display.set_caption(
        f"Color: {current_color:10} Frame: {frames:10} FPS: {clock.get_fps():.1f}"
    )
    # draw surface - fill background
    window.fill(pygame.color.Color("grey"))
    ## draw image
    window.fill(pygame.color.Color(current_color), color_rect)
    # show surface
    pygame.display.update()
    # limit frames
    clock.tick(FPS)
    frames += 1
pygame.quit()

In this case, we're using a queue to send the text to the thread which will perform the text-to-speech, so the frame counter does not pause. Unfortunately we can't interrupt the speech, but that might not be a problem for your application.

Let me know if you need any further explanation of what's going on, I've tried to keep it simple.

EDIT: Looking at this issue recorded on github it seems that speech interruption on Windows is not working, the work-around in that discussion is to run the text-to-speech engine in a separate process which is terminated. I think that might incur an unacceptable start-up cost for process and engine re-initialisation every time an interruption is desired. It may work for your usage.