ffmpeg - 为什么在使用 Java JNA 编码声音文件时出现提示音?

标签 ffmpeg jna libavcodec

我已经使用 JNA 实现了 hello world libavcodec 来生成包含纯 440Hz 正弦波的 wav 文件。但是当我实际运行程序时the wav file contains annoying clicks and blips (与 pure sin wav created from the C program 相比)。我怎么调用 avcodec_encode_audio2 是错误的?

这是我的 Java 代码。 All the sources如果您想尝试编译它,也可以在 github 上找到。

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
import java.util.Objects;

import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.TargetDataLine;


public class Sin {
    /**
     * Abstract class that allows you to put the initialization and cleanup
     * code at the same place instead of separated by the big try block.
     */
    public static abstract class SharedPtr<T> implements AutoCloseable {
        public T ptr;
        public SharedPtr(T ptr) {
            this.ptr = ptr;
        }
        /**
         * Abstract override forces method to throw no checked exceptions.
         * Subclasses will call a C function that throws no exceptions.
         */
        @Override public abstract void close();
    }

    /**
     * @param args
     * @throws IOException 
     * @throws LineUnavailableException 
     */
    public static void main(String[] args) throws IOException, LineUnavailableException {
        final AvcodecLibrary avcodec = AvcodecLibrary.INSTANCE;
        final AvformatLibrary avformat = AvformatLibrary.INSTANCE;
        final AvutilLibrary avutil = AvutilLibrary.INSTANCE;
        avcodec.avcodec_register_all();
        avformat.av_register_all();
        AVOutputFormat.ByReference format = null;
        String format_name = "wav", file_url = "file:sinjava.wav";
        for (AVOutputFormat.ByReference formatIter = avformat.av_oformat_next(null); formatIter != null; formatIter = avformat.av_oformat_next(formatIter)) {
            formatIter.setAutoWrite(false);
            String iterName = formatIter.name;
            if (format_name.equals(iterName)) {
                format = formatIter;
                break;
            }
        }
        Objects.requireNonNull(format);
        System.out.format("Found format %s%n", format_name);
        AVCodec codec = avcodec.avcodec_find_encoder(format.audio_codec);  // one of AvcodecLibrary.CodecID
        Objects.requireNonNull(codec);
        codec.setAutoWrite(false);
        try (
            SharedPtr<AVFormatContext> fmtCtxPtr = new SharedPtr<AVFormatContext>(avformat.avformat_alloc_context()) {@Override public void close(){if (null!=ptr) avformat.avformat_free_context(ptr);}};
            ) {
            AVFormatContext fmtCtx = Objects.requireNonNull(fmtCtxPtr.ptr);
            fmtCtx.setAutoWrite(false);
            fmtCtx.setAutoRead(false);
            fmtCtx.oformat = format; fmtCtx.writeField("oformat");

            AVStream st = avformat.avformat_new_stream(fmtCtx, codec);
            if (null == st)
                throw new IllegalStateException();
            AVCodecContext c = st.codec;
            if (null == c)
                throw new IllegalStateException();
            st.setAutoWrite(false);
            fmtCtx.readField("nb_streams");
            st.id = fmtCtx.nb_streams - 1; st.writeField("id");
            assert st.id >= 0;
            System.out.format("New stream: id=%d%n", st.id);

            if (0 != (format.flags & AvformatLibrary.AVFMT_GLOBALHEADER)) {
                c.flags |= AvcodecLibrary.CODEC_FLAG_GLOBAL_HEADER;
            }
            c.writeField("flags");

            c.bit_rate = 64000; c.writeField("bit_rate");
            int bestSampleRate;
            if (null == codec.supported_samplerates) {
                bestSampleRate = 44100;
            } else {
                bestSampleRate = 0;
                for (int offset = 0, sample_rate = codec.supported_samplerates.getInt(offset); sample_rate != 0; codec.supported_samplerates.getInt(++offset)) {
                    bestSampleRate = Math.max(bestSampleRate, sample_rate);
                }
                assert bestSampleRate > 0;
            }
            c.sample_rate = bestSampleRate; c.writeField("sample_rate");
            c.channel_layout = AvutilLibrary.AV_CH_LAYOUT_STEREO; c.writeField("channel_layout");
            c.channels = avutil.av_get_channel_layout_nb_channels(c.channel_layout); c.writeField("channels");
            assert 2 == c.channels;
            c.sample_fmt = AvutilLibrary.AVSampleFormat.AV_SAMPLE_FMT_S16; c.writeField("sample_fmt");
            c.time_base.num = 1;
            c.time_base.den = bestSampleRate;
            c.writeField("time_base");
            c.setAutoWrite(false);

            AudioFormat javaSoundFormat = new AudioFormat(bestSampleRate, Short.SIZE, c.channels, true, ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN);
            DataLine.Info javaDataLineInfo = new DataLine.Info(TargetDataLine.class, javaSoundFormat);
            if (! AudioSystem.isLineSupported(javaDataLineInfo))
                throw new IllegalStateException();
            int err;
            if ((err = avcodec.avcodec_open(c, codec)) < 0) {
                throw new IllegalStateException();
            }
            assert c.channels != 0;

            AVIOContext.ByReference[] ioCtxReference = new AVIOContext.ByReference[1];
            if (0 != (err = avformat.avio_open(ioCtxReference, file_url, AvformatLibrary.AVIO_FLAG_WRITE))) {
                throw new IllegalStateException("averror " + err);
            }
            try (
                SharedPtr<AVIOContext.ByReference> ioCtxPtr = new SharedPtr<AVIOContext.ByReference>(ioCtxReference[0]) {@Override public void close(){if (null!=ptr) avutil.av_free(ptr.getPointer());}}
                ) {
                AVIOContext.ByReference ioCtx = Objects.requireNonNull(ioCtxPtr.ptr);
                fmtCtx.pb = ioCtx; fmtCtx.writeField("pb");
                int averr = avformat.avformat_write_header(fmtCtx, null);
                if (averr < 0) {
                    throw new IllegalStateException("" + averr);
                }
                st.read();  // it is modified by avformat_write_header
                System.out.format("Wrote header. fmtCtx->nb_streams=%d, st->time_base=%d/%d; st->avg_frame_rate=%d/%d%n", fmtCtx.nb_streams, st.time_base.num, st.time_base.den, st.avg_frame_rate.num, st.avg_frame_rate.den); 
                avformat.avio_flush(ioCtx);
                int frame_size = c.frame_size != 0 ? c.frame_size : 4096;
                int expectedBufferSize = frame_size * c.channels * (Short.SIZE/8);
                boolean supports_small_last_frame = c.frame_size == 0 ? true : 0 != (codec.capabilities & AvcodecLibrary.CODEC_CAP_SMALL_LAST_FRAME);
                int bufferSize = avutil.av_samples_get_buffer_size((IntBuffer)null, c.channels, frame_size, c.sample_fmt, 1);
                assert bufferSize == expectedBufferSize: String.format("expected %d; got %d", expectedBufferSize, bufferSize);
                ByteBuffer samples = ByteBuffer.allocate(expectedBufferSize);
                samples.order(ByteOrder.nativeOrder());
                int audio_time = 0;  // unit: (c.time_base) s = (1/c.sample_rate) s
                int audio_sample_count = supports_small_last_frame ?
                    3 * c.sample_rate :
                    3 * c.sample_rate / frame_size * frame_size;
                while (audio_time < audio_sample_count) {
                    int frame_audio_time = audio_time;
                    samples.clear();
                    int nb_samples_in_frame = 0;
                    // encode a single tone sound
                    for (; samples.hasRemaining() && audio_time < audio_sample_count; nb_samples_in_frame++, audio_time++) {
                        double x = 2*Math.PI*440/c.sample_rate * audio_time;
                        double y = 10000 * Math.sin(x);
                        samples.putShort((short) y);
                        samples.putShort((short) y);
                    }
                    samples.flip();
                    try (
                            SharedPtr<AVFrame> framePtr = new SharedPtr<AVFrame>(avcodec.avcodec_alloc_frame()) {@Override public void close() {if (null!=ptr) avutil.av_free(ptr.getPointer());}};
                            ) {
                        AVFrame frame = Objects.requireNonNull(framePtr.ptr);
                        frame.setAutoRead(false);  // will be an in param
                        frame.setAutoWrite(false);
                        frame.nb_samples = nb_samples_in_frame; frame.writeField("nb_samples"); // actually unused during encoding
                        // Presentation time, in AVStream.time_base units.
                        frame.pts = avutil.av_rescale_q(frame_audio_time, c.time_base, st.time_base);  // i * codec_time_base / st_time_base
                        frame.writeField("pts");

                        assert c.channels > 0;
                        int bytesPerSample = avutil.av_get_bytes_per_sample(c.sample_fmt);
                        assert bytesPerSample > 0;
                        if (0 != (err = avcodec.avcodec_fill_audio_frame(frame, c.channels, c.sample_fmt, samples, samples.capacity(), 1))) {
                            throw new IllegalStateException(""+err);
                        }
                        AVPacket packet = new AVPacket();  // one of the few structs from ffmpeg with guaranteed size
                        avcodec.av_init_packet(packet);
                        packet.size = 0;
                        packet.data = null;
                        packet.stream_index = st.index; packet.writeField("stream_index");
                        // encode the samples
                        IntBuffer gotPacket = IntBuffer.allocate(1);
                        if (0 != (err = avcodec.avcodec_encode_audio2(c, packet, frame, gotPacket))) {
                            throw new IllegalStateException("" + err);
                        } else if (0 != gotPacket.get()) {
                            packet.read();
                            averr = avformat.av_write_frame(fmtCtx, packet);
                            if (averr < 0)
                                throw new IllegalStateException("" + averr);
                        }
                        System.out.format("encoded frame: codec time = %d; pts=%d = av_rescale_q(%d,%d/%d,%d/%d) (%.02fs) contains %d samples (%.02fs); got_packet=%d; packet.size=%d%n",
                                frame_audio_time,
                                frame.pts,
                                frame_audio_time, st.codec.time_base.num,st.codec.time_base.den,st.time_base.num,st.time_base.den,
                                1.*frame_audio_time/c.sample_rate, frame.nb_samples, 1.*frame.nb_samples/c.sample_rate, gotPacket.array()[0], packet.size);
                    }
                }
                if (0 != (err = avformat.av_write_trailer(fmtCtx))) {
                    throw new IllegalStateException();
                }
                avformat.avio_flush(ioCtx);
            }
        }
        System.out.println("Done writing");
    }
}

我还用C重写了它,C版本运行良好,没有任何问题。但我不知道我如何以不同的方式使用这个库;所有库函数调用都应该相同!

//! gcc --std=c99 sin.c $(pkg-config --cflags --libs libavutil libavformat libavcodec) -o sin
// sudo apt-get install libswscale-dev
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
int main(int argc, char *argv[]) {
  const char *format_name = "wav", *file_url = "file:sin.wav";
  avcodec_register_all();
  av_register_all();
  AVOutputFormat *format = NULL;
  for (AVOutputFormat *formatIter = av_oformat_next(NULL); formatIter != NULL; formatIter = av_oformat_next(formatIter)) {
    int hasEncoder = NULL != avcodec_find_encoder(formatIter->audio_codec);
    if (0 == strcmp(format_name, formatIter->name)) {
      format = formatIter;
      break;
    }
  }
  printf("Found format %s\n", format->name);
  AVCodec *codec = avcodec_find_encoder(format->audio_codec);
  if (! codec) {
    fprintf(stderr, "Could not find codec %d\n", format->audio_codec);
    exit(1);
  }
  AVFormatContext *fmtCtx = avformat_alloc_context();
  if (! fmtCtx) {
    fprintf(stderr, "error allocating AVFormatContext\n");
    exit(1);
  }
  fmtCtx->oformat = format;
  AVStream *st = avformat_new_stream(fmtCtx, codec);
  if (! st) {
    fprintf(stderr, "error allocating AVStream\n");
    exit(1);
  }
  if (fmtCtx->nb_streams != 1) {
    fprintf(stderr, "avformat_new_stream should have incremented nb_streams, but it's still %d\n", fmtCtx->nb_streams);
    exit(1);
  }
  AVCodecContext *c = st->codec;
  if (! c) {
    fprintf(stderr, "avformat_new_stream should have allocated a AVCodecContext for my stream\n");
    exit(1);
  }
  st->id = fmtCtx->nb_streams - 1;
  printf("Created stream %d\n", st->id);
  if (0 != (format->flags & AVFMT_GLOBALHEADER)) {
    c->flags |= CODEC_FLAG_GLOBAL_HEADER;
  }
  c->bit_rate = 64000;
  int bestSampleRate;
  if (NULL == codec->supported_samplerates) {
    bestSampleRate = 44100;
    printf("Setting sample rate: %d\n", bestSampleRate);
  } else {
    bestSampleRate = 0;
    for (const int *sample_rate_iter = codec->supported_samplerates; *sample_rate_iter != 0; sample_rate_iter++) {
      if (*sample_rate_iter >= bestSampleRate)
        bestSampleRate = *sample_rate_iter;
    }
    printf("Using best supported sample rate: %d\n", bestSampleRate);
  }
  c->sample_rate = bestSampleRate;
  c->channel_layout = AV_CH_LAYOUT_STEREO;
  c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
  c->time_base.num = 1;
  c->time_base.den = c->sample_rate;
  if (c->channels != 2) {
    fprintf(stderr, "av_get_channel_layout_nb_channels returned %d instead of 2\n", c->channels);
    exit(1);
  }
  c->sample_fmt = AV_SAMPLE_FMT_S16;
  int averr;
  if ((averr = avcodec_open2(c, codec, NULL)) < 0) {
    fprintf(stderr, "avcodec_open2 returned error %d\n", averr);
    exit(1);
  }
  AVIOContext *ioCtx = NULL;
  if (0 != (averr = avio_open(&ioCtx, file_url, AVIO_FLAG_WRITE))) {
    fprintf(stderr, "avio_open returned error %d\n", averr);
    exit(1);
  }
  if (ioCtx == NULL) {
    fprintf(stderr, "AVIOContext should have been set by avio_open\n");
    exit(1);
  }
  fmtCtx->pb = ioCtx;
  if (0 != (averr = avformat_write_header(fmtCtx, NULL))) {
    fprintf(stderr, "avformat_write_header returned error %d\n", averr);
    exit(1);
  }
  printf("Wrote header. fmtCtx->nb_streams=%d, st->time_base=%d/%d; st->avg_frame_rate=%d/%d\n", fmtCtx->nb_streams, st->time_base.num, st->time_base.den, st->avg_frame_rate.num, st->avg_frame_rate.den);
  int align = 1;
  int sample_size = av_get_bytes_per_sample(c->sample_fmt);
  if (sample_size != sizeof(int16_t)) {
    fprintf(stderr, "expected sample size=%zu but got %d\n", sizeof(int16_t), sample_size);
    exit(1);
  }
  int frame_size = c->frame_size != 0 ? c->frame_size : 4096;
  int bufferSize = av_samples_get_buffer_size(NULL, c->channels, frame_size, c->sample_fmt, align);
  int expectedBufferSize = frame_size * c->channels * sample_size;
  int supports_small_last_frame = c->frame_size == 0 ? 1 : 0 != (codec->capabilities & CODEC_CAP_SMALL_LAST_FRAME);
  if (bufferSize != expectedBufferSize) {
    fprintf(stderr, "expected buffer size=%d but got %d\n", expectedBufferSize, bufferSize);
    exit(1);
  }
  int16_t *samples = (int16_t*)malloc(bufferSize);

  uint32_t audio_time = 0;  // unit: (1/c->sample_rate) s
  uint32_t audio_sample_count = supports_small_last_frame ?
    3 * c->sample_rate :
    3 * c->sample_rate / frame_size * frame_size;
  while (audio_time < audio_sample_count) {
    uint32_t frame_audio_time = audio_time; // unit: (1/c->sample_rate) s
    AVFrame *frame = avcodec_alloc_frame();
    if (frame == NULL) {
      fprintf(stderr, "avcodec_alloc_frame failed\n");
      exit(1);
    }
    for (uint32_t i = 0; i != frame_size && audio_time < audio_sample_count; i++, audio_time++) {
      samples[2*i] = samples[2*i + 1] = 10000 * sin(2*M_PI*440/c->sample_rate * audio_time);
      frame->nb_samples = i+1;  // actually unused during encoding
    }
    // frame->format = c->sample_fmt;  // unused during encoding
    frame->pts = av_rescale_q(frame_audio_time, c->time_base, st->time_base);
    if (0 != (averr = avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt, (const uint8_t*)samples, bufferSize, align))) {
      fprintf(stderr, "avcodec_fill_audio_frame returned error %d\n", averr);
      exit(1);
    }
    AVPacket packet;
    av_init_packet(&packet);
    packet.data = NULL;
    packet.size = 0;
    int got_packet;
    if (0 != (averr = avcodec_encode_audio2(c, &packet, frame, &got_packet))) {
      fprintf(stderr, "avcodec_encode_audio2 returned error %d\n", averr);
      exit(1);
    }
    if (got_packet) {
        packet.stream_index = st->index;
      if (0 < (averr = av_write_frame(fmtCtx, &packet))) {
        fprintf(stderr, "av_write_frame returned error %d\n", averr);
        exit(1);
      } else if (averr == 1) {
        // end of stream wanted.
      }
    }
    printf("encoded frame: codec time = %u; format pts=%ld = av_rescale_q(%u,%d/%d,%d/%d) (%.02fs) contains %d samples (%.02fs); got_packet=%d; packet.size=%d\n",
        frame_audio_time,
        frame->pts,
        frame_audio_time, c->time_base.num, c->time_base.den, st->time_base.num, st->time_base.den,
        1.*frame_audio_time/c->sample_rate, frame->nb_samples, 1.*frame->nb_samples/c->sample_rate, got_packet, packet.size);
    av_free(frame);
  }
  free(samples);
  cleanupFile:
  if (0 != (averr = av_write_trailer(fmtCtx))) {
    fprintf(stderr, "av_write_trailer returned error %d\n", averr);
    exit(1);
  }

  avio_flush(ioCtx);
  avio_close(ioCtx);
  avformat_free_context(fmtCtx);
}

最佳答案

问题是 ByteBuffer.allocate(int) 创建的缓冲区的地址在 JNA 函数调用中不稳定。每次调用 native 函数时,它都会将字节复制到临时数组中以供该调用使用。相比之下,ByteBuffer.allocateDirect(int) 创建一个 native 指针稳定的缓冲区。这显然是一个well-known pitfall of using ByteBuffer in JNA ,但我没有注意到 Using Pointers and Arrays 的细则中。 .

因此,我只需将示例创建修复为 ByteBuffer Samples = ByteBuffer.allocateDirect(expectedBufferSize);。随后的avcodec_fill_audio_frame调用不会复制样本;它只是将frame->data[0]指向uint8_t*地址,因此samples数组需要有一个稳定的地址。

关于ffmpeg - 为什么在使用 Java JNA 编码声音文件时出现提示音?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/18392817/

相关文章:

c++ - MLT 框架 windows build melt 加载 avformat 失败

macos - 如何使用 FFmpeg 转换视频文件的音频编解码器?

java - Java JNA:应用程序完成后JRE崩溃

java - 如何隔离特定于平台的 JNA 绑定(bind)?

ffmpeg - 如何创建解码器的完整副本?

ffmpeg - AVFrame 中给定宏 block 的 YUV 值

macos - 通过从应用程序包中复制 lib 意外地将 dylib 文件覆盖到 usr/local/lib

php - Imagemagick convert -fill 不会改变特定调色板区域的边缘

jna - 支持 arm 64 的 sun jna 库

c - ffmpeg c api 中的 av_register_all() 与 avcodec_register_all() 有什么区别?