c++ - 如何在 Windows 10 上使用 C++ 将连续的原始音频数据记录到循环缓冲区中？

自 Windows Multimedia turned out to be utterly incapable of recording continuous audio ，我得到了使用Windows Core Audio的提示。有一本手册here ，但我不知道如何编写大量的开销代码来使录音工作。谁能提供一个完整的、最小化的连续音频录制到循环缓冲区的实现？

到目前为止，我一直停留在下面的代码中，没有超过 pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice); 行，因为 pEnumerator 仍然是 nullptr .

#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>

#define REFTIMES_PER_SEC  10000000
#define REFTIMES_PER_MILLISEC  10000

int main() {
    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;

    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioCaptureClient* pCaptureClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 packetLength = 0;
    BYTE* pData;
    DWORD flags;

    CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
    pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
    pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
    pAudioClient->GetMixFormat(&pwfx);
    pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
    pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
    pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);

    // Calculate the actual duration of the allocated buffer.
    REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;

    pAudioClient->Start();  // Start recording.

    // Each loop fills about half of the shared buffer.
    while(true) {
        // Sleep for half the buffer duration.
        Sleep(hnsActualDuration/REFTIMES_PER_MILLISEC/2);
        pCaptureClient->GetNextPacketSize(&packetLength);
        while(packetLength != 0) {
            // Get the available data in the shared buffer.
            pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
            if(flags&AUDCLNT_BUFFERFLAGS_SILENT) {
                pData = NULL;  // Tell CopyData to write silence.
            }

            // Copy the available capture data to the audio sink.
            //hr = pMySink->CopyData(pData, numFramesAvailable, &bDone);

            pCaptureClient->ReleaseBuffer(numFramesAvailable);
            pCaptureClient->GetNextPacketSize(&packetLength);
        }
    }
    pAudioClient->Stop();
    return 0;
}

编辑(2021 年 7 月 24 日):

这是我的代码更新，用于故障排除:

#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>

#include <chrono>
class Clock {
private:
    typedef chrono::high_resolution_clock clock;
    chrono::time_point<clock> t;
public:
    Clock() { start(); }
    void start() { t = clock::now(); }
    double stop() const { return chrono::duration_cast<chrono::duration<double>>(clock::now()-t).count(); }
};

const uint base        =   4096;
const uint sample_rate =  48000; // must be supported by microphone
const uint sample_size = 1*base; // must be a power of 2
const uint bandwidth   =   5000; // must be <= sample_rate/2

float* wave = new float[sample_size]; // circular buffer

void fill(float* const wave, const float* const buffer, int offset) {
    for(int i=sample_size; i>=offset; i--) {
        wave[i] = wave[i-offset];
    }
    for(int i=0; i<offset; i++) {
        const uint p = offset-1-i;
        wave[i] = 0.5f*(buffer[2*p]+buffer[2*p+1]); // left and right channels
    }
}

int main() {
    for(uint i=0; i<sample_size; i++) wave[i] = 0.0f;
    
    Clock clock;

    #define REFTIMES_PER_SEC  10000000
    #define REFTIMES_PER_MILLISEC  10000

    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;

    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioCaptureClient* pCaptureClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 packetLength = 0;
    BYTE* pData;
    DWORD flags;

    CoInitializeEx(NULL, COINIT_MULTITHREADED);
    CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
    pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
    pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
    pAudioClient->GetMixFormat(&pwfx);
    
    println(pwfx->wFormatTag);// 65534
    println(WAVE_FORMAT_PCM);// 1
    println(pwfx->nChannels);// 2
    println((uint)pwfx->nSamplesPerSec);// 48000
    println(pwfx->wBitsPerSample);// 32
    println(pwfx->nBlockAlign);// 8
    println(pwfx->wBitsPerSample*pwfx->nChannels/8);// 8
    println((uint)pwfx->nAvgBytesPerSec);// 384000
    println((uint)(pwfx->nBlockAlign*pwfx->nSamplesPerSec*pwfx->nChannels));// 768000
    println(pwfx->cbSize);// 22

    pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
    pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
    pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);

    // Calculate the actual duration of the allocated buffer.
    //REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;

    pAudioClient->Start();  // Start recording.
    
    while(running) {

        pCaptureClient->GetNextPacketSize(&packetLength); // packetLength and numFramesAvailable are either 0 or 480
        pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);

        const int offset = (uint)numFramesAvailable;
        if(offset>0) {
            fill(wave, (float*)pData, offset); // here I add pData to the circular buffer "wave"
        }

        while(packetLength != 0) {
            pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL); // Get the available data in the shared buffer.
            if(flags&AUDCLNT_BUFFERFLAGS_SILENT) {
                pData = NULL;  // Tell CopyData to write silence.
            }
            pCaptureClient->ReleaseBuffer(numFramesAvailable);
            pCaptureClient->GetNextPacketSize(&packetLength);
        }

        sleep(1.0/120.0-clock.stop());
        clock.start();
    }
    pAudioClient->Stop();
}

最佳答案

你没有调用 CoInitializeEx ，因此所有 COM 调用都会失败。

您还应该测试所有调用以查看它们是否返回错误。

解决评论中提出的问题:

我相信，如果您想在共享模式下操作端点，那么您必须使用GetFixFormat返回的参数。这意味着:

您只能使用一个采样率(除非您编写代码来执行转换，这是一项艰巨的任务)
如果您希望样本为 float ，则必须自行转换它们

要编写在所有机器上运行的代码，您必须满足混合格式向您抛出的任何问题。这可能是:

16 位整数
24 位整数 (nBlockAlign = 3)
32 位容器中的 24 位整数 (nBlockAlign = 4)
32 位整数
32 位浮点(罕见)
64 位浮点(根据我的经验，这是闻所未闻的)

样本将按照代码运行所在机器的 native 字节顺序排列，并且是交错的。

因此，请列出 pwfx 中的各种参数，并为您想要支持的每种示例格式编写相关代码。

假设您希望将 float 标准化为 -1 .. +1 和 2 channel 输入数据，您可以对 16 位整数执行此操作，例如:

const int16_t *inbuf = (const int16_t *) pData;
float *outbuf = ...;

for (int i = 0; i < numFramesAvailable * 2; ++i)
{
    int16_t sample = *inbuf++;
    *outbuf++ = (float) (sample * (1.0 / 32767));
}

请注意，我通过乘以倒数来避免(缓慢的)浮点除法(编译器将预先计算 1.0/32767)。

剩下的就交给你了。

关于c++ - 如何在 Windows 10 上使用 C++ 将连续的原始音频数据记录到循环缓冲区中？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/63456223/

c++ - 如何在 Windows 10 上使用 C++ 将连续的原始音频数据记录到循环缓冲区中？

上一篇：ios - NLTagger : enumerating tags of multiple types in one pass

下一篇：gremlin - 如何在 gremlin 中按父级分组并收集子级的所有属性值？