Java合并具有延迟和重叠的音频文件

我的目的是不录制我的系统音频输出或简单地合并音频文件。

我需要一个“AudioMerger”类。它应该有一个方法 merge，其中给出了总长度。然后它应该创建一个该长度的无声 wav 文件并向其中添加指定的声音。添加的声音可以重叠并且可以有偏移

示例: sound.wav 的长度为 3 秒

Merger merger = new Merger();
merger.add("sound.wav", 2);
merger.add("sound.wav", 6);
merger.add("sound.wav", 7);

//creates a 10 seconds wav file with the contents of sound.wav inserted at the specific seconds
merger.merge(10);
merger.saveToFile(new File("out.wav"));

最佳答案

感谢https://stackoverflow.com/users/740553/mike-pomax-kamermans的帮助我现在有了工作代码。

口哨.wav: https://voca.ro/1iqDr3yVZ6uG
输出.wav:https://voca.ro/1jxlHkNUuH9r

主要问题是创建一个空的 wav 文件。为了实现这一点，我需要在开始时编写一个正确的标题。您可以在此处阅读有关 .wav header 的详细信息: http://www.topherlee.com/software/pcm-tut-wavformat.html
当我实现这个时，我在小/大尾数中读写字节时遇到了困难。基本上这些指定了数字存储的方向。 Big Endian 像我们现在和 Java 一样存储它们(从左到右)，而 Little Endian 则相反地存储它们(从右到左)。 wav 文件期望其所有数字都采用 Little Endian 格式。因此，当我们从 wav 文件加载数字时，我们需要将它们转换为 Big Endian(我们的 Java 数字)，而当我们写入文件时，我们需要将它们重新转换为 Little Endian。为此，我们可以使用 Integer.reverseBytes() 和 Short.reverseBytes() 方法。

OneHundredTwo:
大尾数:102
小尾数:201

我遇到的另一个困难是合并音频字节数组时。我将数组的每一位相加并计算平均值。然而我的 SampleSize 是 16 位，所以我需要计算每两个字节的平均值，而不是每个字节的平均值。

当首先让它工作时，在我插入的音频播放之前总是有奇怪的噪音。我不小心用文件内容填充了字节数组。合并时，我的程序还合并了 header 数据并将它们解释为产生这种噪音的声音数据。砍掉标题后，我的音频听起来不错。

但是，当我的流重叠时，它们会产生大量前景噪音。在计算平均值时，我没有将除数转换为 float ，因此它会截断一些音频数据。 3/2 变成 1，而不是 1.5 四舍五入为 2

我实际上做对的事情是确保我的音频只能以可被二整除的偏移量插入。否则，它将合并前一个幅度的第一个字节与下一个幅度的最后一个字节。

import java.io.File;
import java.io.IOException;

import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.UnsupportedAudioFileException;

public class Main {

    public static void main(String[] args) throws IOException, UnsupportedAudioFileException, LineUnavailableException {

        AudioMerger merger = new AudioMerger();
        MergeSound sound = new MergeSound(new File("whistle.wav"));

        merger.addSound(2, sound);
        merger.addSound(5, sound);
        merger.addSound(5.5, sound);
        merger.merge(10);
        merger.saveToFile(new File("out.wav"));

    }

}

import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;

public class MergeSound {

    private short audioFormat;
    private int sampleRate;
    private short sampleSize;
    private short channels;

    private ByteBuffer buffer;

    public MergeSound(File file) throws IOException {

        DataInputStream in = new DataInputStream(new FileInputStream(file));
        byte[] sound = new byte[in.available() - 44];

        // read header data
        in.skipNBytes(20);
        audioFormat = Short.reverseBytes(in.readShort());
        channels = Short.reverseBytes(in.readShort());
        sampleRate = Integer.reverseBytes(in.readInt());
        in.skipNBytes(6);
        sampleSize = Short.reverseBytes(in.readShort());
        in.skipNBytes(8);// make sure to cut the full header of else there will be strange noise

        in.read(sound);
        buffer = ByteBuffer.wrap(sound);
    }

    public ByteBuffer getBuffer() {
        return buffer;
    }

    public short getAudioFormat() {
        return audioFormat;
    }

    public void setAudioFormat(short audioFormat) {
        this.audioFormat = audioFormat;
    }

    public int getSampleRate() {
        return sampleRate;
    }

    public void setSampleRate(int sampleRate) {
        this.sampleRate = sampleRate;
    }

    public short getSampleSize() {
        return sampleSize;
    }

    public void setSampleSize(short sampleSize) {
        this.sampleSize = sampleSize;
    }

    public short getChannels() {
        return channels;
    }

    public void setChannels(short channels) {
        this.channels = channels;
    }

}


import static java.lang.Math.ceil;
import static java.lang.Math.round;

import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;

public class AudioMerger {

    private short audioFormat = 1;
    private int sampleRate = 44100;
    private short sampleSize = 16;
    private short channels = 2;
    private short blockAlign = (short) (sampleSize * channels / 8);
    private int byteRate = sampleRate * sampleSize * channels / 8;
    private ByteBuffer audioBuffer;
    private ArrayList<MergeSound> sounds = new ArrayList<MergeSound>();
    private ArrayList<Integer> offsets = new ArrayList<Integer>();

    public void addSound(double offsetInSeconds, MergeSound sound) {

        if (sound.getAudioFormat() != audioFormat)
            new RuntimeException("Incompatible AudioFormat");
        if (sound.getSampleRate() != sampleRate)
            new RuntimeException("Incompatible SampleRate");
        if (sound.getSampleSize() != sampleSize)
            new RuntimeException("Incompatible SampleSize");
        if (sound.getChannels() != channels)
            new RuntimeException("Incompatible amount of Channels");

        int offset = secondsToByte(offsetInSeconds);
        offset = offset % 2 == 0 ? offset : offset + 1;// ensure we start at short when merging

        sounds.add(sound);
        offsets.add(offset);
    }

    public void merge(double durationInSeconds) {
        audioBuffer = ByteBuffer.allocate(secondsToByte(durationInSeconds));

        for (int i = 0; i < sounds.size(); i++) {

            ByteBuffer buffer = sounds.get(i).getBuffer();
            int offset1 = offsets.get(i);

            // iterate over all sound data to append it
            while (buffer.hasRemaining()) {

                int position = offset1 + buffer.position();// the global position in audioBuffer

                // exit if audio plays after end
                if (position >= audioBuffer.capacity())
                    return;

                // add the audio data to the vars
                short sum = Short.reverseBytes(buffer.getShort());
                int matches = 1;

                // make sure later entries dont override the previsously merged
               //continue only if theres empty audio data
                if (audioBuffer.getShort(position) == 0) {

                    // iterate over the other sounds and check if the need to be merged
                    for (int j = i + 1; j < sounds.size(); j++) {// set j to i+1 to avoid all previous
                        ByteBuffer mergeBuffer = sounds.get(j).getBuffer();
                        int mergeOffset = offsets.get(j);

                        // check if this soundfile contains data that has to be merged
                        if (position >= mergeOffset && position < mergeOffset + mergeBuffer.capacity()) {
                            sum += Short.reverseBytes(mergeBuffer.getShort(position - mergeOffset));
                            matches++;
                        }
                    }
//make sure to cast to float 3/1=1 BUT round(3/1f)=2 for example
                    audioBuffer.putShort(position, Short.reverseBytes((short) round(sum / (float) matches)));
                }
            }
            buffer.rewind();// So the sound can be added again
        }
    }

    private int secondsToByte(double seconds) {
        return (int) ceil(seconds * byteRate);
    }

    public void saveToFile(File file) throws IOException {

        byte[] audioData = audioBuffer.array();

        int audioSize = audioData.length;
        int fileSize = audioSize + 44;

        // The stream that writes the audio file to the disk
        DataOutputStream out = new DataOutputStream(new FileOutputStream(file));

        // Write Header
        out.writeBytes("RIFF");// 0-4 ChunkId always RIFF
        out.writeInt(Integer.reverseBytes(fileSize));// 5-8 ChunkSize always audio-length +header-length(44)
        out.writeBytes("WAVE");// 9-12 Format always WAVE
        out.writeBytes("fmt ");// 13-16 Subchunk1 ID always "fmt " with trailing whitespace
        out.writeInt(Integer.reverseBytes(16)); // 17-20 Subchunk1 Size always 16
        out.writeShort(Short.reverseBytes(audioFormat));// 21-22 Audio-Format 1 for PCM PulseAudio
        out.writeShort(Short.reverseBytes(channels));// 23-24 Num-Channels 1 for mono, 2 for stereo
        out.writeInt(Integer.reverseBytes(sampleRate));// 25-28 Sample-Rate
        out.writeInt(Integer.reverseBytes(byteRate));// 29-32 Byte Rate
        out.writeShort(Short.reverseBytes(blockAlign));// 33-34 Block Align
        out.writeShort(Short.reverseBytes(sampleSize));// 35-36 Bits-Per-Sample
        out.writeBytes("data");// 37-40 Subchunk2 ID always data
        out.writeInt(Integer.reverseBytes(audioSize));// 41-44 Subchunk 2 Size audio-length

        out.write(audioData);// append the merged data
        out.close();// close the stream properly
    }

}

关于Java合并具有延迟和重叠的音频文件，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/70525829/

Java合并具有延迟和重叠的音频文件

上一篇：python-3.x - 连接多个具有相同列名的 CSV

下一篇：edgedb - 基于非必需属性的存在的计算属性