Is there a way to pause and resume speech recognition in webkitSpeechRecognition when audio is played by the computer? The computer right now seems to be confusing what is user input via the microphone and audio output from a wav file.
Right now I have created the below:
var speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; 
var recognition = new webkitSpeechRecognition();
    
window.addEventListener('DOMContentLoaded', function() {
    document.getElementById("speak_button").addEventListener('click', function() {
       recognition.start();
       setInterval(updateCountDown,1000); /* countdown timer starts 1 second after 
                                           being clicked */
       updateCountDown(); //this is a function that counts down from 2 minutes to 0
        
    });
});
var transcript; // transcript variable will store what the user says to the computer
recognition.addEventListener('result', e => {
    transcript = Array.from(e.results)
       .map(result => result[0])
       .map(result => result.transcript)
       .join('');
   console.log(transcript);
    communicateToUser();
  
});
function communicateToUser() {
    var audio_age = new Audio("age_20.wav");
        var age_regular_expression = /(?=.*\bhow\b)(?=.*\bold\b)(?=.*\byou\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bage\b)| (?=.*\btell\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\bhow\b)(?=.*\bold\b)|(?=.*\byou\b)(?=.*\bhow\b)(?=.*\bold\b)/gi;
    // if regular expression matches all words, then function will be performed
        if (age_regular_expression.test(transcript)) {
        recognition.stop(); /* wanting the speech recognition to stop here so that it 
                            doesn't capture the contents of audio_age */
        audio_age.play(); // audio will play "I am 20 years old" 
        recognition.start(); /* wanting the speech recognition to start again 
                            after audio_age is played */
        
    }
}
The problem is that the recognition.stop() function isn't working, which means that the microphone will continue capturing the contents of audio_age.wav and will convert it to text. So, when I want to speak to the computer again and ask it a question, the transcript that will be analysed will include the transcript from when I just spoke before.
Any advice would be appreciated.
I was thinking of a solution but I'm not sure how to implement it:
SOLUTION:
stop the recognition function and delay it by the same number of seconds that the audio file plays for (for example 5 seconds), and then the recognition function can be started again after those 5 seconds?
Thanks!
EDIT FOR CESARE:
// SPEECH RECOGNITION SET UP 
    var speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; 
    var recognition = new webkitSpeechRecognition();
        
    window.addEventListener('DOMContentLoaded', function() {
    document.getElementById("speak_button").addEventListener('click', function() {
            recognition.start();
            setInterval(updateCountDown,1000);
            updateCountDown();
        });
    });
// ALL OF THE AUDIO FILES --> WILL BE PLAYED IF REGEX MATCHES TRUE
    
    const audio_name = new Audio("name_harry.wav");
    
    const audio_age = new Audio("age_20.wav");
    
    const audio_date_of_birth = new Audio("15_nov_1999.wav");
    
    const audio_occupation = new Audio("grocery_store.wav");
// ON SPEECH START --> IF MICROPHONE INPUT IS DETECTED, THEN SPEECH RECOGNITION STARTS 
    
    recognition.onspeechstart = () => {
        console.log("SPEECH STARTED");
        if (!audio_age.paused) {audio_age.pause()}
        else if (!audio_name.paused) {audio_name.pause()}
        else if (!audio_date_of_birth.paused) {audio_date_of_birth.pause()}
        else if (!audio_occupation.paused) {audio_occupation.pause()}
  
    };
    
// ON SPEECH END --> WHEN MICROPHONE INPUT STOPS, SPEECH RECOGNITION SHOULD END 
    recognition.onspeechend = () => {
        console.log("SPEECH ENDED");
        recognition.stop();
    
    };
    
// I have included this because I want the computer to continue listening to the user, but only after the audio is finished playing 
    recognition.addEventListener('end', recognition.start);
// After audio is ended, speech recognition will start again
    
    audio_name.addEventListener('ended', recognition.start);
    audio_age.addEventListener('ended', recognition.start);
    audio_date_of_birth.addEventListener('ended', recognition.start);
    audio_occupation.addEventListener('ended', recognition.start);
    audio_height.addEventListener('ended', recognition.start);
    
    
// USED TO OBTAIN THE USER TRANSCRIPT/ACTUAL SPEECH CONTENT
    var transcript;
    
    recognition.addEventListener('result', e => {
        transcript = Array.from(e.results)
           .map((result) => result[0])
           .map((result) => result.transcript)
           .join('');
       console.log(transcript);
       communicateToUser();
      
    });
    
 
     // ALL OF THE REGULAR EXPRESSIONS
    const name_regex = /what is your name|(?=.*\byour\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bcan\b)(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\blet\b)(?=.*\bknow\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bshare\b)(?=.*\bme\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bfirst\b)(?=.*\band\b)(?=.*\blast\b)(?=.*\bname\b)/ig;
const age_regex = /(?=.*\bhow\b)(?=.*\bold\b)(?=.*\byou\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bage\b)| (?=.*\btell\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\bhow\b)(?=.*\bold\b)|(?=.*\byou\b)(?=.*\bhow\b)(?=.*\bold\b)/gi;
const date_of_birth_regex = /(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\byour\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bshare\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bwhen\b)(?=.*\byou\b)(?=.*\bborn\b)|(?=.*\bwhen\b)(?=.*\byou\b)(?=.*\bborn\b)|(?=.*\bwhat\b)(?=.*\bdate\b)(?=.*\byou\b)(?=.*\bborn\b)/gi
const patient_occupation = /do you have a job|(?=.*\bdo\b)(?=.*\byou\b)(?=.*\bwork\b)|(?=.*\byou\b)(?=.*\bhave\b)(?=.*\bjob\b)|(?=.*\byou\b)(?=.*\bwork\b)(?=.*\bwhere\b)|(?=.*\banything\b)(?=.*\bfor\b)(?=.*\bwork\b)|(?=.*\byou\b)(?=.*\bwork\b)(?=.*\banywhere\b)|(?=.*\bwhat\b)(?=.*\boccupation\b)|(?=.*\byour\b)(?=.*\boccupation\b)|(?=.*\byou\b)(?=.*\boccupation\b)|(?=.*\byour\b)(?=.*\bjob\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bjob\b)|(?=.*\byou\b)(?=.*\bjob\b)|(?=.*\bjob\b)/ig;
// COMMUNICATE BACK TO USER FUNCTION
 
       function communicateToUser() {
    
    if (name_regex.test(transcript)) {
            audio_name.play();
    }
    if (age_regex.test(transcript)) {
            audio_age.play();
    }
    if (date_of_birth_regex.test(transcript)) {
                audio_date_of_birth.play();
    }
    if (occuptation_regex.test(transcript)) {
                    audio_occupation.play();
        }
    
    }
         
UpdateCountdown function
function updateCountDown() {
   
   const minutes = Math.floor(time / 60);
   let seconds = time % 60;
   seconds = seconds < 2 ? '0' + seconds : seconds;
   document.getElementById("countdown").innerHTML = `${minutes}:${seconds}`;
   time--;
   time = time < 0 ? 0 : time; 
    if (minutes == 0 && seconds == 0) {
        document.getElementById('tableStyle').style.display = "block";
        recognition.stop(); //ADDING IN RECOGNITION.STOP ONCE MINUTES AND SECONDS == 0!
        
    }
   };
 
                        
EDIT:
I made a working example, https://stackblitz.com/edit/web-platform-ppcuh9?file=index.html: