How to pause speech recognition (JS SpeechRecognition) before audio is played by the computer and then resume speech recognition after being played?

Question

How to pause speech recognition (JS SpeechRecognition) before audio is played by the computer and then resume speech recognition after being played?

1.4k views Asked by sperfume At 17 April 2022 at 05:28

Is there a way to pause and resume speech recognition in webkitSpeechRecognition when audio is played by the computer? The computer right now seems to be confusing what is user input via the microphone and audio output from a wav file.

Right now I have created the below:

var speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; 
var recognition = new webkitSpeechRecognition();
    
window.addEventListener('DOMContentLoaded', function() {
    document.getElementById("speak_button").addEventListener('click', function() {
       recognition.start();
       setInterval(updateCountDown,1000); /* countdown timer starts 1 second after 
                                           being clicked */
       updateCountDown(); //this is a function that counts down from 2 minutes to 0
        
    });
});


var transcript; // transcript variable will store what the user says to the computer

recognition.addEventListener('result', e => {
    transcript = Array.from(e.results)
       .map(result => result[0])
       .map(result => result.transcript)
       .join('');
   console.log(transcript);
    communicateToUser();
  
});


function communicateToUser() {

    var audio_age = new Audio("age_20.wav");

        var age_regular_expression = /(?=.*\bhow\b)(?=.*\bold\b)(?=.*\byou\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bage\b)| (?=.*\btell\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\bhow\b)(?=.*\bold\b)|(?=.*\byou\b)(?=.*\bhow\b)(?=.*\bold\b)/gi;

    // if regular expression matches all words, then function will be performed

        if (age_regular_expression.test(transcript)) {
        recognition.stop(); /* wanting the speech recognition to stop here so that it 
                            doesn't capture the contents of audio_age */
        audio_age.play(); // audio will play "I am 20 years old" 
        recognition.start(); /* wanting the speech recognition to start again 
                            after audio_age is played */
        
    }

}

The problem is that the recognition.stop() function isn't working, which means that the microphone will continue capturing the contents of audio_age.wav and will convert it to text. So, when I want to speak to the computer again and ask it a question, the transcript that will be analysed will include the transcript from when I just spoke before.

Any advice would be appreciated.

I was thinking of a solution but I'm not sure how to implement it:
SOLUTION: stop the recognition function and delay it by the same number of seconds that the audio file plays for (for example 5 seconds), and then the recognition function can be started again after those 5 seconds?

Thanks!

EDIT FOR CESARE:

// SPEECH RECOGNITION SET UP 

    var speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; 
    var recognition = new webkitSpeechRecognition();
        

    window.addEventListener('DOMContentLoaded', function() {
    document.getElementById("speak_button").addEventListener('click', function() {
            recognition.start();
            setInterval(updateCountDown,1000);
            updateCountDown();
        });
    });

// ALL OF THE AUDIO FILES --> WILL BE PLAYED IF REGEX MATCHES TRUE
    
    const audio_name = new Audio("name_harry.wav");
    
    const audio_age = new Audio("age_20.wav");
    
    const audio_date_of_birth = new Audio("15_nov_1999.wav");
    
    const audio_occupation = new Audio("grocery_store.wav");


// ON SPEECH START --> IF MICROPHONE INPUT IS DETECTED, THEN SPEECH RECOGNITION STARTS 
    
    recognition.onspeechstart = () => {
        console.log("SPEECH STARTED");
        if (!audio_age.paused) {audio_age.pause()}
        else if (!audio_name.paused) {audio_name.pause()}
        else if (!audio_date_of_birth.paused) {audio_date_of_birth.pause()}
        else if (!audio_occupation.paused) {audio_occupation.pause()}
  
    };
    
// ON SPEECH END --> WHEN MICROPHONE INPUT STOPS, SPEECH RECOGNITION SHOULD END 

    recognition.onspeechend = () => {
        console.log("SPEECH ENDED");
        recognition.stop();
    
    };
    
// I have included this because I want the computer to continue listening to the user, but only after the audio is finished playing 

    recognition.addEventListener('end', recognition.start);

// After audio is ended, speech recognition will start again
    
    audio_name.addEventListener('ended', recognition.start);
    audio_age.addEventListener('ended', recognition.start);
    audio_date_of_birth.addEventListener('ended', recognition.start);
    audio_occupation.addEventListener('ended', recognition.start);
    audio_height.addEventListener('ended', recognition.start);
    
    
// USED TO OBTAIN THE USER TRANSCRIPT/ACTUAL SPEECH CONTENT

    var transcript;
    
    recognition.addEventListener('result', e => {
        transcript = Array.from(e.results)
           .map((result) => result[0])
           .map((result) => result.transcript)
           .join('');
       console.log(transcript);
       communicateToUser();
      
    });
    
 




     // ALL OF THE REGULAR EXPRESSIONS

    const name_regex = /what is your name|(?=.*\byour\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bcan\b)(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\blet\b)(?=.*\bknow\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bshare\b)(?=.*\bme\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bfirst\b)(?=.*\band\b)(?=.*\blast\b)(?=.*\bname\b)/ig;

const age_regex = /(?=.*\bhow\b)(?=.*\bold\b)(?=.*\byou\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bage\b)| (?=.*\btell\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\bhow\b)(?=.*\bold\b)|(?=.*\byou\b)(?=.*\bhow\b)(?=.*\bold\b)/gi;

const date_of_birth_regex = /(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\byour\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bshare\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bwhen\b)(?=.*\byou\b)(?=.*\bborn\b)|(?=.*\bwhen\b)(?=.*\byou\b)(?=.*\bborn\b)|(?=.*\bwhat\b)(?=.*\bdate\b)(?=.*\byou\b)(?=.*\bborn\b)/gi

const patient_occupation = /do you have a job|(?=.*\bdo\b)(?=.*\byou\b)(?=.*\bwork\b)|(?=.*\byou\b)(?=.*\bhave\b)(?=.*\bjob\b)|(?=.*\byou\b)(?=.*\bwork\b)(?=.*\bwhere\b)|(?=.*\banything\b)(?=.*\bfor\b)(?=.*\bwork\b)|(?=.*\byou\b)(?=.*\bwork\b)(?=.*\banywhere\b)|(?=.*\bwhat\b)(?=.*\boccupation\b)|(?=.*\byour\b)(?=.*\boccupation\b)|(?=.*\byou\b)(?=.*\boccupation\b)|(?=.*\byour\b)(?=.*\bjob\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bjob\b)|(?=.*\byou\b)(?=.*\bjob\b)|(?=.*\bjob\b)/ig;

// COMMUNICATE BACK TO USER FUNCTION
 

       function communicateToUser() {
    
    if (name_regex.test(transcript)) {
            audio_name.play();
    }

    if (age_regex.test(transcript)) {
            audio_age.play();
    }
    if (date_of_birth_regex.test(transcript)) {
                audio_date_of_birth.play();
    }
    if (occuptation_regex.test(transcript)) {
                    audio_occupation.play();
        }
    
    }

UpdateCountdown function

function updateCountDown() {
   
   const minutes = Math.floor(time / 60);
   let seconds = time % 60;

   seconds = seconds < 2 ? '0' + seconds : seconds;

   document.getElementById("countdown").innerHTML = `${minutes}:${seconds}`;

   time--;

   time = time < 0 ? 0 : time; 

    if (minutes == 0 && seconds == 0) {
        document.getElementById('tableStyle').style.display = "block";
        recognition.stop(); //ADDING IN RECOGNITION.STOP ONCE MINUTES AND SECONDS == 0!
        
    }

   };

Original Q&A

There are 1 answers

**Cesare Polonara** · Accepted Answer · 2022-04-17T06:14:30+00:00

EDIT:

I made a working example, https://stackblitz.com/edit/web-platform-ppcuh9?file=index.html:

let isListening = false; // use this flag to toggle the recognition
let interval;
const button = document.getElementById('speak_button');

const speaker = new MakeSpeechSynth({
  pitch: 0.5,
  rate: 0.8,
  language: 'en-US',
});

const SpeechRecognition =
  window.SpeechRecognition || window.webkitSpeechRecognition;
const recognition = new SpeechRecognition();

button.addEventListener('click', function() {
  if (isListening) {
    console.log('ABORTING RECOGNITION');
    isListening = false;
    recognition.abort();
    clearInterval(interval);
    button.innerText = 'Click Me To Speak';
  } else {
    console.log('STARTING RECOGNITION');
    recognition.start();
    interval = setInterval(updateCountDown, 1000);
    updateCountDown();
    button.innerText = 'Stop Recognition';
    isListening = true;
  }
});

recognition.onaudiostart = () => {
  console.log('RECOGNITION STARTED');
};

recognition.onaudioend = () => {
  console.log('RECOGNITION FINISHED');
};

recognition.onend = () => {
  console.log('RECOGNITION DISCONNECTED');
  if (isListening) recognition.start();
};

recognition.onspeechstart = () => {
  console.log('SPEECH STARTED');
  // You can stop the bot speaking if you want when you speak over him:
  // Comment if you want him to keep speaking

  //Object.values(data).forEach((d) => d.audio.pause());
  if (speaker.isSpeaking) speaker.cancel();
};

recognition.onspeechend = () => {
  console.log('SPEECH ENDED');
};

recognition.addEventListener('result', (e) => {
  const transcript = Array.from(e.results)
    .map((result) => result[0])
    .map((result) => result.transcript)
    .join('');
  console.log(transcript);
  speakBackToMe(transcript);
});

function speakBackToMe(str) {
  Object.values(data).forEach((d) => {
    if (d.regex.test(str)) {
      // d.audio.play();
      speaker.speak(d.message);
      console.log(d.message);
    }
  });
}

// UPDATE COUNTDOWN
const startingMinutes = 2;
let time = startingMinutes * 60;

function updateCountDown() {
  const minutes = Math.floor(time / 60);
  let seconds = time % 60;
  seconds = seconds < 2 ? '0' + seconds : seconds;
  document.getElementById('countdown').innerHTML = `${minutes}:${seconds}`;
  time--;
  time = time < 0 ? 0 : time;

  if (minutes == 0 && seconds == 0) {
    document.getElementById('tableStyle').style.display = 'table-cell';
  }
}

<div id="app"></div>
<button id="speak_button">Click Me to Speak</button>
<p id="countdown"></p>

TechQA.

How to pause speech recognition (JS SpeechRecognition) before audio is played by the computer and then resume speech recognition after being played?

There are 1 answers

Related Questions in JAVASCRIPT

Related Questions in AUDIO

Related Questions in WEBKIT

Related Questions in WEBKITSPEECHRECOGNITION

Popular Questions

Popular Tags

Trending Questions