Record an audio stream with WASAPI

6.5k views Asked by At

I'm reading documentation such as Capturing a stream or Loopback recording, but I can't find a good reproducible example (with includes, build instructions, etc.) about how to record chunks from a Loopback audio device (sometimes called "What you hear", "Stereo Mix") with Windows WASAPI.

Would you have a simple reproducible example showing how to record audio chunks in a loop from a WASAPI device, in C++?

Here is a similar (working) example in Python:

import soundcard as sc  # installed with: pip install soundcard
lb = sc.all_microphones(include_loopback=True)[0]
with lb.recorder(samplerate=44100) as mic:
    while True:
        data = mic.record(numframes=None)
        print(data)    # chunks of audio data (448 samples x 2 channels as an array by default)
1

There are 1 answers

8
Rita Han On BEST ANSWER

This is an example of loopback-mode audio capturing.

Based on documents Capturing a stream, make some editions pointed out by Loopback recording as follows:

// In the call to the IMMDeviceEnumerator::GetDefaultAudioEndpoint method, change the first parameter (dataFlow) from eCapture to eRender.
hr = pEnumerator->GetDefaultAudioEndpoint(
    eRender, eConsole, &pDevice);

...

// In the call to the IAudioClient::Initialize method, change the value of the second parameter (StreamFlags) from 0 to AUDCLNT_STREAMFLAGS_LOOPBACK.
hr = pAudioClient->Initialize(
    AUDCLNT_SHAREMODE_SHARED,
    AUDCLNT_STREAMFLAGS_LOOPBACK,
    hnsRequestedDuration,
    0,
    pwfx,
    NULL);

Missed part in the documents: CopyData() and write file functions (WriteWaveHeader() and FinishWaveFile()). The following show examples for those functions implementations. Refer to blog Sample - WASAPI loopback capture (record what you hear) for more detailed information.

HRESULT MyAudioSink::CopyData(BYTE* pData, UINT32 NumFrames, BOOL* pDone, WAVEFORMATEX* pwfx, HMMIO hFile)
{
    HRESULT hr = S_OK;

    if (0 == NumFrames) {
        wprintf(L"IAudioCaptureClient::GetBuffer said to read 0 frames\n");
        return E_UNEXPECTED;
    }

    LONG lBytesToWrite = NumFrames * pwfx->nBlockAlign;
#pragma prefast(suppress: __WARNING_INCORRECT_ANNOTATION, "IAudioCaptureClient::GetBuffer SAL annotation implies a 1-byte buffer")
    LONG lBytesWritten = mmioWrite(hFile, reinterpret_cast<PCHAR>(pData), lBytesToWrite);
    if (lBytesToWrite != lBytesWritten) {
        wprintf(L"mmioWrite wrote %u bytes : expected %u bytes", lBytesWritten, lBytesToWrite);
        return E_UNEXPECTED;
    }

    static int CallCount = 0;
    cout << "CallCount = " << CallCount++ << "NumFrames: " << NumFrames << endl ;

    if (clock() > 10 * CLOCKS_PER_SEC) //Record 10 seconds. From the first time call clock() at the beginning of the main().
        *pDone = true;

    return S_OK;
}

HRESULT WriteWaveHeader(HMMIO hFile, LPCWAVEFORMATEX pwfx, MMCKINFO* pckRIFF, MMCKINFO* pckData) {
    MMRESULT result;

    // make a RIFF/WAVE chunk
    pckRIFF->ckid = MAKEFOURCC('R', 'I', 'F', 'F');
    pckRIFF->fccType = MAKEFOURCC('W', 'A', 'V', 'E');

    result = mmioCreateChunk(hFile, pckRIFF, MMIO_CREATERIFF);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioCreateChunk(\"RIFF/WAVE\") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    // make a 'fmt ' chunk (within the RIFF/WAVE chunk)
    MMCKINFO chunk;
    chunk.ckid = MAKEFOURCC('f', 'm', 't', ' ');
    result = mmioCreateChunk(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioCreateChunk(\"fmt \") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    // write the WAVEFORMATEX data to it
    LONG lBytesInWfx = sizeof(WAVEFORMATEX) + pwfx->cbSize;
    LONG lBytesWritten =
        mmioWrite(
            hFile,
            reinterpret_cast<PCHAR>(const_cast<LPWAVEFORMATEX>(pwfx)),
            lBytesInWfx
        );
    if (lBytesWritten != lBytesInWfx) {
        wprintf(L"mmioWrite(fmt data) wrote %u bytes; expected %u bytes", lBytesWritten, lBytesInWfx);
        return E_FAIL;
    }

    // ascend from the 'fmt ' chunk
    result = mmioAscend(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioAscend(\"fmt \" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    // make a 'fact' chunk whose data is (DWORD)0
    chunk.ckid = MAKEFOURCC('f', 'a', 'c', 't');
    result = mmioCreateChunk(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioCreateChunk(\"fmt \") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    // write (DWORD)0 to it
    // this is cleaned up later
    DWORD frames = 0;
    lBytesWritten = mmioWrite(hFile, reinterpret_cast<PCHAR>(&frames), sizeof(frames));
    if (lBytesWritten != sizeof(frames)) {
        wprintf(L"mmioWrite(fact data) wrote %u bytes; expected %u bytes", lBytesWritten, (UINT32)sizeof(frames));
        return E_FAIL;
    }

    // ascend from the 'fact' chunk
    result = mmioAscend(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioAscend(\"fact\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    // make a 'data' chunk and leave the data pointer there
    pckData->ckid = MAKEFOURCC('d', 'a', 't', 'a');
    result = mmioCreateChunk(hFile, pckData, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioCreateChunk(\"data\") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    return S_OK;
}

HRESULT FinishWaveFile(HMMIO hFile, MMCKINFO* pckRIFF, MMCKINFO* pckData) {
    MMRESULT result;

    result = mmioAscend(hFile, pckData, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioAscend(\"data\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    result = mmioAscend(hFile, pckRIFF, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioAscend(\"RIFF/WAVE\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    return S_OK;
}

Call WriteWaveHeader before pAudioClient->Start(). Call FinishWaveFile after pAudioClient->Stop().

As a result, it will record about 10 seconds audio playing on your Windows.

UPDATE #1:

#include <Windows.h>
#include <mmsystem.h>
#include <mmdeviceapi.h>
#include <audioclient.h>
#include <time.h>
#include <iostream>

int main()
{
    clock();

    HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);

    // Create file
    MMIOINFO mi = { 0 };
    hFile = mmioOpen(
        // some flags cause mmioOpen write to this buffer
        // but not any that we're using
        (LPWSTR)fileName,
        &mi,
        MMIO_WRITE | MMIO_CREATE
    );

    if (NULL == hFile) {
        wprintf(L"mmioOpen(\"%ls\", ...) failed. wErrorRet == %u", fileName, GetLastError());
        return E_FAIL;
    }

    MyAudioSink AudioSink;
    RecordAudioStream(&AudioSink);

    mmioClose(hFile, 0);

    CoUninitialize();
    return 0;
}

Compile command:

cl -DUNICODE loopbackCapture.cpp /link winmm.lib user32.lib Kernel32.lib Ole32.lib

UPDATE #2:

    #include <Windows.h>
    #include <mmsystem.h>
    #include <mmdeviceapi.h>
    #include <audioclient.h>
    #include <time.h>
    #include <iostream>
    
    using namespace std;
    
    #pragma comment(lib, "Winmm.lib")
    
    WCHAR fileName[] = L"loopback-capture.wav";
    BOOL bDone = FALSE;
    HMMIO hFile = NULL;
    
    // REFERENCE_TIME time units per second and per millisecond
    #define REFTIMES_PER_SEC  10000000
    #define REFTIMES_PER_MILLISEC  10000
    
    #define EXIT_ON_ERROR(hres)  \
                  if (FAILED(hres)) { goto Exit; }
    #define SAFE_RELEASE(punk)  \
                  if ((punk) != NULL)  \
                    { (punk)->Release(); (punk) = NULL; }
    
    const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
    const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
    const IID IID_IAudioClient = __uuidof(IAudioClient);
    const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);
    
    class MyAudioSink
    {
    public:
        HRESULT CopyData(BYTE* pData, UINT32 NumFrames, BOOL* pDone, WAVEFORMATEX* pwfx, HMMIO hFile);
    };
    
    HRESULT WriteWaveHeader(HMMIO hFile, LPCWAVEFORMATEX pwfx, MMCKINFO* pckRIFF, MMCKINFO* pckData);
    HRESULT FinishWaveFile(HMMIO hFile, MMCKINFO* pckRIFF, MMCKINFO* pckData);
    HRESULT RecordAudioStream(MyAudioSink* pMySink);
    
    int main()
    {
        clock();
    
        HRESULT hr = CoInitializeEx(NULL, COINIT_MULTITHREADED);
    
        // Create file
        MMIOINFO mi = { 0 };
        hFile = mmioOpen(
            // some flags cause mmioOpen write to this buffer
            // but not any that we're using
            (LPWSTR)fileName,
            &mi,
            MMIO_WRITE | MMIO_CREATE
        );
    
        if (NULL == hFile) {
            wprintf(L"mmioOpen(\"%ls\", ...) failed. wErrorRet == %u", fileName, GetLastError());
            return E_FAIL;
        }
    
        MyAudioSink AudioSink;
        RecordAudioStream(&AudioSink);
    
        mmioClose(hFile, 0);
    
        CoUninitialize();
        return 0;
    }
    
    
    HRESULT MyAudioSink::CopyData(BYTE* pData, UINT32 NumFrames, BOOL* pDone, WAVEFORMATEX* pwfx, HMMIO hFile)
    {
        HRESULT hr = S_OK;
    
        if (0 == NumFrames) {
            wprintf(L"IAudioCaptureClient::GetBuffer said to read 0 frames\n");
            return E_UNEXPECTED;
        }
    
        LONG lBytesToWrite = NumFrames * pwfx->nBlockAlign;
    #pragma prefast(suppress: __WARNING_INCORRECT_ANNOTATION, "IAudioCaptureClient::GetBuffer SAL annotation implies a 1-byte buffer")
        LONG lBytesWritten = mmioWrite(hFile, reinterpret_cast<PCHAR>(pData), lBytesToWrite);
        if (lBytesToWrite != lBytesWritten) {
            wprintf(L"mmioWrite wrote %u bytes : expected %u bytes", lBytesWritten, lBytesToWrite);
            return E_UNEXPECTED;
        }
    
        static int CallCount = 0;
        cout << "CallCount = " << CallCount++ << "NumFrames: " << NumFrames << endl ;
    
        if (clock() > 10 * CLOCKS_PER_SEC) //Record 10 seconds. From the first time call clock() at the beginning of the main().
            *pDone = true;
    
        return S_OK;
    }
    
    HRESULT RecordAudioStream(MyAudioSink* pMySink)
    {
        HRESULT hr;
        REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
        REFERENCE_TIME hnsActualDuration;
        UINT32 bufferFrameCount;
        UINT32 numFramesAvailable;
        IMMDeviceEnumerator* pEnumerator = NULL;
        IMMDevice* pDevice = NULL;
        IAudioClient* pAudioClient = NULL;
        IAudioCaptureClient* pCaptureClient = NULL;
        WAVEFORMATEX* pwfx = NULL;
        UINT32 packetLength = 0;
    
        BYTE* pData;
        DWORD flags;
    
        MMCKINFO ckRIFF = { 0 };
        MMCKINFO ckData = { 0 };
    
        hr = CoCreateInstance(
            CLSID_MMDeviceEnumerator, NULL,
            CLSCTX_ALL, IID_IMMDeviceEnumerator,
            (void**)& pEnumerator);
        EXIT_ON_ERROR(hr)
    
        hr = pEnumerator->GetDefaultAudioEndpoint(
            eRender, eConsole, &pDevice);
        EXIT_ON_ERROR(hr)
    
        hr = pDevice->Activate(
            IID_IAudioClient, CLSCTX_ALL,
            NULL, (void**)& pAudioClient);
        EXIT_ON_ERROR(hr)
    
        hr = pAudioClient->GetMixFormat(&pwfx);
        EXIT_ON_ERROR(hr)
    
        hr = pAudioClient->Initialize(
            AUDCLNT_SHAREMODE_SHARED,
            AUDCLNT_STREAMFLAGS_LOOPBACK,
            hnsRequestedDuration,
            0,
            pwfx,
            NULL);
        EXIT_ON_ERROR(hr)
    
        // Get the size of the allocated buffer.
        hr = pAudioClient->GetBufferSize(&bufferFrameCount);
        EXIT_ON_ERROR(hr)
    
        hr = pAudioClient->GetService(
            IID_IAudioCaptureClient,
            (void**)& pCaptureClient);
        EXIT_ON_ERROR(hr)
    
        hr = WriteWaveHeader((HMMIO)hFile, pwfx, &ckRIFF, &ckData);
        if (FAILED(hr)) {
            // WriteWaveHeader does its own logging
            return hr;
        }
    
        // Calculate the actual duration of the allocated buffer.
        hnsActualDuration = (double)REFTIMES_PER_SEC *
        bufferFrameCount / pwfx->nSamplesPerSec;
    
        hr = pAudioClient->Start();  // Start recording.
        EXIT_ON_ERROR(hr)
    
        // Each loop fills about half of the shared buffer.
        while (bDone == FALSE)
        {
            // Sleep for half the buffer duration.
            Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 2);
    
            hr = pCaptureClient->GetNextPacketSize(&packetLength);
            EXIT_ON_ERROR(hr)
    
            while (packetLength != 0)
            {
                // Get the available data in the shared buffer.
                hr = pCaptureClient->GetBuffer(
                    &pData,
                    &numFramesAvailable,
                    &flags, NULL, NULL);
                EXIT_ON_ERROR(hr)
    
                    if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
                    {
                        pData = NULL;  // Tell CopyData to write silence.
                    }
    
                // Copy the available capture data to the audio sink.
                hr = pMySink->CopyData(
                    pData, numFramesAvailable, &bDone, pwfx, (HMMIO)hFile);
                EXIT_ON_ERROR(hr)
    
                    hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
                EXIT_ON_ERROR(hr)
    
                    hr = pCaptureClient->GetNextPacketSize(&packetLength);
                EXIT_ON_ERROR(hr)
            }
        }
    
        hr = pAudioClient->Stop();  // Stop recording.
        EXIT_ON_ERROR(hr)
    
        hr = FinishWaveFile((HMMIO)hFile, &ckData, &ckRIFF);
        if (FAILED(hr)) {
            // FinishWaveFile does it's own logging
            return hr;
        }
    
    Exit:
        CoTaskMemFree(pwfx);
        SAFE_RELEASE(pEnumerator)
        SAFE_RELEASE(pDevice)
        SAFE_RELEASE(pAudioClient)
        SAFE_RELEASE(pCaptureClient)
    
        return hr;
    }

HRESULT WriteWaveHeader(HMMIO hFile, LPCWAVEFORMATEX pwfx, MMCKINFO* pckRIFF, MMCKINFO* pckData) {
    MMRESULT result;

    // make a RIFF/WAVE chunk
    pckRIFF->ckid = MAKEFOURCC('R', 'I', 'F', 'F');
    pckRIFF->fccType = MAKEFOURCC('W', 'A', 'V', 'E');

    result = mmioCreateChunk(hFile, pckRIFF, MMIO_CREATERIFF);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioCreateChunk(\"RIFF/WAVE\") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    // make a 'fmt ' chunk (within the RIFF/WAVE chunk)
    MMCKINFO chunk;
    chunk.ckid = MAKEFOURCC('f', 'm', 't', ' ');
    result = mmioCreateChunk(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioCreateChunk(\"fmt \") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    // write the WAVEFORMATEX data to it
    LONG lBytesInWfx = sizeof(WAVEFORMATEX) + pwfx->cbSize;
    LONG lBytesWritten =
        mmioWrite(
            hFile,
            reinterpret_cast<PCHAR>(const_cast<LPWAVEFORMATEX>(pwfx)),
            lBytesInWfx
        );
    if (lBytesWritten != lBytesInWfx) {
        wprintf(L"mmioWrite(fmt data) wrote %u bytes; expected %u bytes", lBytesWritten, lBytesInWfx);
        return E_FAIL;
    }

    // ascend from the 'fmt ' chunk
    result = mmioAscend(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioAscend(\"fmt \" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    // make a 'fact' chunk whose data is (DWORD)0
    chunk.ckid = MAKEFOURCC('f', 'a', 'c', 't');
    result = mmioCreateChunk(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioCreateChunk(\"fmt \") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    // write (DWORD)0 to it
    // this is cleaned up later
    DWORD frames = 0;
    lBytesWritten = mmioWrite(hFile, reinterpret_cast<PCHAR>(&frames), sizeof(frames));
    if (lBytesWritten != sizeof(frames)) {
        wprintf(L"mmioWrite(fact data) wrote %u bytes; expected %u bytes", lBytesWritten, (UINT32)sizeof(frames));
        return E_FAIL;
    }

    // ascend from the 'fact' chunk
    result = mmioAscend(hFile, &chunk, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioAscend(\"fact\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    // make a 'data' chunk and leave the data pointer there
    pckData->ckid = MAKEFOURCC('d', 'a', 't', 'a');
    result = mmioCreateChunk(hFile, pckData, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioCreateChunk(\"data\") failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    return S_OK;
}

HRESULT FinishWaveFile(HMMIO hFile, MMCKINFO* pckRIFF, MMCKINFO* pckData) {
    MMRESULT result;

    result = mmioAscend(hFile, pckData, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioAscend(\"data\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    result = mmioAscend(hFile, pckRIFF, 0);
    if (MMSYSERR_NOERROR != result) {
        wprintf(L"mmioAscend(\"RIFF/WAVE\" failed: MMRESULT = 0x%08x", result);
        return E_FAIL;
    }

    return S_OK;
}