c++ - 用 FFMPEG 解码 pcm_s16le?

标签 c++ audio ffmpeg decoding

我在使用 ffmpeg 解码 wav 文件时遇到问题。我是新手,不太习惯。

在我的应用程序中,我必须输入音频文件并获取要处理的样本数组。 我使用 ffmpeg 创建了一个函数,用于输入文件的路径、开始输出样本的时间位置以及要在几秒钟内解码的 block 的长度。

我没有声誉,所以我不得不创建一个 gdrive 目录,您可以在其中查看问题和我处理的文件。

这是:https://goo.gl/8KnjAj

当我尝试解码文件 harp.wav 时,一切运行正常,我可以绘制样本,如图像 plot-harp.png

该文件是一个 WAV 文件,编码为:pcm_u8, 11025 Hz, 1 channels, u8, 88 kb/s

当我尝试解码文件 demo-unprocessed.wav 时,问题就来了。 它输出一系列没有意义的样本。它输出一系列样本,如图 graph1-demo.jpg 所示。

该文件是一个 WAV 文件,编码为:pcm_s16le, 44100 Hz, 1 channels, s16, 705 kb/s

IDK 我的代码中的问题所在,我已经用 FFMPEG 解码前后检查了代码,它工作得非常好。

这是 dataReader.cpp 的代码:

/* Start by including the necessary */
#include "dataReader.h"
#include <cstdlib>
#include <iostream>
#include <fstream>

#ifdef __cplusplus
extern "C" {
#endif
    #include <libavcodec/avcodec.h> 
    #include <libavformat/avformat.h>
    #include <libavutil/avutil.h>
#ifdef __cplusplus 
}
#endif

using namespace std;

/* initialization function for audioChunk */
audioChunk::audioChunk(){
    data=NULL;
    size=0;
    bitrate=0;
}

/* function to get back chunk lenght in seconds */
int audioChunk::getTimeLenght(){
    return size/bitrate;
}

/* initialization function for audioChunk_dNorm */
audioChunk_dNorm::audioChunk_dNorm(){
    data=NULL;
    size=0;
    bitrate=0;
}

/* function to get back chunk lenght in seconds */
int audioChunk_dNorm::getTimeLenght(){
    return size/bitrate;
}

/* function to normalize audioChunk into audioChunk_dNorm */
void audioChunk_dNorm::fillAudioChunk(audioChunk* cnk){

    size=cnk->size;
    bitrate=cnk->bitrate;

    double min=cnk->data[0];
    double max=cnk->data[0];

    for(int i=0;i<cnk->size;i++){
        if(*(cnk->data+i)>max) max=*(cnk->data+i);
        else if(*(cnk->data+i)<min) min=*(cnk->data+i);
    }

    data=new double[size];

    for(int i=0;i<size;i++){
        //data[i]=cnk->data[i]+256*data[i+1];
        if(data[i]!=255) data[i]=2*((cnk->data[i])-(max-min)/2)/(max-min);
        else data[i]=0;
    }
    cout<<"bitrate "<<bitrate<<endl;
}


audioChunk readData(const char* path_name, const double start_time, const double lenght){

    /* inizialize audioChunk */
    audioChunk output;

    /* Check input times */
    if((start_time<0)||(lenght<0)) {
        cout<<"Input times should be positive";
        return output;
    }

    /* Start FFmpeg */
    av_register_all();

    /* Initialize the frame to read the data and verify memory allocation */
    AVFrame* frame = av_frame_alloc();
    if (!frame)
    {
        cout << "Error allocating the frame" << endl;
        return output;
    }

    /* Initialization of the Context, to open the file */
    AVFormatContext* formatContext = NULL;
    /* Opening the file, and check if it has opened */
    if (avformat_open_input(&formatContext, path_name, NULL, NULL) != 0)
    {
        av_frame_free(&frame);
        cout << "Error opening the file" << endl;
        return output;
    }

    /* Find the stream info, if not found, exit */
    if (avformat_find_stream_info(formatContext, NULL) < 0)
    {
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        cout << "Error finding the stream info" << endl;
        return output;
    }

    /* Check inputs to verify time input */
    if(start_time>(formatContext->duration/1000000)){
        cout<< "Error, start_time is over file duration"<<endl;
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        return output;
    }

    /* Chunk = number of samples to output */
    long long int chunk = ((formatContext->bit_rate)*lenght/8);
    /* Start = address of sample where start to read */
    long long int start = ((formatContext->bit_rate)*start_time/8);
    /* Tot_sampl = number of the samples in the file */
    long long int tot_sampl = (formatContext->bit_rate)*(formatContext->duration)/8000000;

    /* Set the lenght of chunk to avoid segfault and to read all the file */
    if (start+chunk>tot_sampl) {chunk = tot_sampl-start;}
    if (lenght==0) {start = 0; chunk = tot_sampl;}

    /* initialize the array to output */
    output.data = new unsigned char[chunk];
    output.bitrate = formatContext->bit_rate;
    output.size=chunk;

    av_dump_format(formatContext,0,NULL,0);
    cout<<chunk<<" n of sample to read"<<endl;
    cout<<start<<" start"<<endl;
    cout<<output.bitrate<<" bitrate"<<endl;
    cout<<tot_sampl<<" total sample"<<endl;


    /* Find the audio Stream, if no audio stream are found, clean and exit */
    AVCodec* cdc = NULL;
    int streamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &cdc, 0);
    if (streamIndex < 0)
    {
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        cout << "Could not find any audio stream in the file" << endl;
        return output;
    }

    /* Open the audio stream to read data  in audioStream */
    AVStream* audioStream = formatContext->streams[streamIndex];

    /* Initialize the codec context */
    AVCodecContext* codecContext = audioStream->codec;
    codecContext->codec = cdc;
    /* Open the codec, and verify if it has opened */
    if (avcodec_open2(codecContext, codecContext->codec, NULL) != 0)
    {
        av_frame_free(&frame);
        avformat_close_input(&formatContext);
        cout << "Couldn't open the context with the decoder" << endl;
        return output;
    }

    /* Initialize buffer to store compressed packets */
    AVPacket readingPacket;
    av_init_packet(&readingPacket);


    int j=0;
    int count = 0; 

    while(av_read_frame(formatContext, &readingPacket)==0){
        if((count+readingPacket.size)>start){
            if(readingPacket.stream_index == audioStream->index){

                AVPacket decodingPacket = readingPacket;

                // Audio packets can have multiple audio frames in a single packet
                while (decodingPacket.size > 0){
                    // Try to decode the packet into a frame
                    // Some frames rely on multiple packets, so we have to make sure the frame is finished before
                    // we can use it
                    int gotFrame = 0;
                    int result = avcodec_decode_audio4(codecContext, frame, &gotFrame, &decodingPacket);

                    count += result;

                    if (result >= 0 && gotFrame)
                    {
                        decodingPacket.size -= result;
                        decodingPacket.data += result;
                        int a;

                        for(int i=0;i<result-1;i++){

                            *(output.data+j)=frame->data[0][i];

                            j++;
                            if(j>=chunk) break;
                        }

                        // We now have a fully decoded audio frame
                    }
                    else
                    {
                        decodingPacket.size = 0;
                        decodingPacket.data = NULL;
                    }
                    if(j>=chunk) break;
                }
            }              
        }else count+=readingPacket.size;

        // To prevent memory leak, must free packet.
        av_free_packet(&readingPacket);
        if(j>=chunk) break;
    }

    // Some codecs will cause frames to be buffered up in the decoding process. If the CODEC_CAP_DELAY flag
    // is set, there can be buffered up frames that need to be flushed, so we'll do that
    if (codecContext->codec->capabilities & CODEC_CAP_DELAY)
    {
        av_init_packet(&readingPacket);
        // Decode all the remaining frames in the buffer, until the end is reached
        int gotFrame = 0;
        int a;
        int result=avcodec_decode_audio4(codecContext, frame, &gotFrame, &readingPacket);
        while (result >= 0 && gotFrame)
        {
            // We now have a fully decoded audio frame
            for(int i=0;i<result-1;i++){

                *(output.data+j)=frame->data[0][i];

                j++;
                if(j>=chunk) break;
            }
            if(j>=chunk) break;
        }
    }

    // Clean up!
    av_free(frame);
    avcodec_close(codecContext);
    avformat_close_input(&formatContext);

    cout<<"Ended Reading, "<<j<<" samples read"<<endl;
    output.size=j;
    return output;
}

这里是dataReader.h

/* 
 * File:   dataReader.h
 * Author: davide
 *
 * Created on 27 luglio 2015, 11.11
 */

#ifndef DATAREADER_H
#define DATAREADER_H

/* function that reads a file and outputs an array of samples
 * @ path_name = the path of the file to read
 * @ start_time = the position where to start the data reading, 0 = start
 *                the time is in seconds, it can hold to 10e-6 seconds
 * @ lenght = the lenght of the frame to extract the data, 
 *            0 = read all the file (do not use with big files)
 *            if lenght > of file duration, it reads through the end of file.
 *            the time is in seconds, it can hold to 10e-6 seconds  
 */

#include <stdint.h>

class audioChunk{
public:
    uint8_t *data;
    unsigned int size;
    int bitrate;
    int getTimeLenght();
    audioChunk();
};

class audioChunk_dNorm{
public:
    double* data;
    unsigned int size;
    int bitrate;
    int getTimeLenght();
    void fillAudioChunk(audioChunk* cnk);
    audioChunk_dNorm();
};

audioChunk readData(const char* path_name, const double start_time, const double lenght);

#endif  /* DATAREADER_H */

最后是应用程序的 main.cpp。

/* 
 * File:   main.cpp
 * Author: davide
 *
 * Created on 28 luglio 2015, 17.04
 */

#include <cstdlib>
#include "dataReader.h"
#include "transforms.h"
#include "tognuplot.h"
#include <fstream>
#include <iostream>

using namespace std;

/*
 * 
 */
int main(int argc, char** argv) {

    audioChunk *chunk1=new audioChunk;

    audioChunk_dNorm *normChunk1=new audioChunk_dNorm;

    *chunk1=readData("./audio/demo-unprocessed.wav",0,1);

    normChunk1->fillAudioChunk(chunk1);

    ofstream file1;
    file1.open("./file/2wave.txt", std::ofstream::trunc);
    if(file1.is_open()) {
        for(int i=0;i<chunk1->size;i++) {
            int a=chunk1->data[i];
            file1<<i<<" "<<a<<endl;
        }
    }
    else cout<<"Error opening file";

    file1.close();

    return 0;
}

我不明白为什么输出是这样的。解码器是否有可能无法将样本(pcm_16le,16 位)转换为 FFMPEG AVFrame.data,它存储样本 ad uint8_t?如果是,是否有某种方法可以使 FFMPEG 用于存储超过 8 位样本的音频文件?

文件 graph1-demo_good.jpg 是示例的样子,使用我制作的工作 LIBSNDFILE 应用程序提取。

编辑: 似乎程序无法将解码数据(存储在几个 uint8_t unsigned char 中的几个小字节序字节)转换为目标格式(我设置为 unsigned char[ ]), 因为它将位存储为 little-endian 16 字节。所以 audioChunk.data 中的数据是正确的,但我必须不是将其作为 unsigned char 来读取,而是作为几个 little-endian 字节来读取。

最佳答案

我用gdb查看了chunk1->data指向的内存。 (x/256xh 0x18dddf0,以十六进制转储前 256 个半字)。它看起来像是带符号的 16 位值,因为它以大量 00xFFFF0x0001 开头。

因此您的代码需要请求 ffmpeg 转换为特定格式。 IDK 如何最好地做到这一点,抱歉。

关于c++ - 用 FFMPEG 解码 pcm_s16le?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31730161/

相关文章:

c++ - 计算字符串中偶数偶数元音和辅音的程序

c++ - 局部变量在没有操作的情况下被改变

java - 如何使用 Android (Java) 停止后台播放所有音乐?

linux - 在 RHEL 6 x64 中配置网络音频系统 [NAS] 时出错

video - 如何在 FFMpeg 中刻录字幕 (SRT) 文件和时间码

cmd - FFMPEG 屏幕捕获输出非常差且不一致的帧速率作为没有音频的 webm

javascript - 找不到 Discord.js 音乐机器人 ffmpeg?

c++ - 如何在 linux 的 cpp 中找到监听 0.0.0.0 的服务器的网络接口(interface)?

javascript - 禁用背景音乐控制

c++ - 当构造函数为私有(private)时,如何动态创建 FMOD::Sounds?