c - 高效的 C 比特流实现

标签 c compression codec bitstream

我目前正在研究在纯 C 中实现高效比特流的各种可能策略。我需要它来实现各种基于位的压缩算法。但是,我找不到太多关于该主题的文献,而且我似乎也找不到很多很好的例子。

这是我要找的:

  • 主要是基于宏的实现,以避免函数调用
  • 向/从 BitStream 读取/写入“n”个位数的函数。
  • 读取/写入特定位数(例如 5 位)的函数,针对通用位数进行了优化。

我想知道以下问题:

  • 应在 BitStream 中维护的变量。可以有 BYTE 指针、字节位置、当前字节中的位索引、当前字节中剩余的位数等。
  • 如何减少要维护的变量数量。我们拥有的变量越多,我们需要更新的变量就越多。
  • 如何在单个读/写操作的上下文中使用尽可能少的中间/临时变量。
  • 操作是否应该在 BYTE 级、UINT16 级或 UINT32 级完成。也许将位累加到 UINT32 中并在字节已满时写入字节(或写入完成后,使用刷新操作)比按字节执行所有操作要快得多。
  • 我们怎样才能尽可能避免循环。理想情况下,我们应该不惜一切代价避免遍历要写入 BitStream 的位数。

这可能看起来有些矫枉过正,但是当压缩中涉及的其余代码都得到了极度优化时,BitStream 部分似乎只是破坏了整个事情。例如,在图像压缩代码中使用 SIMD CPU 指令来优化部分编码过程的汇编例程并不少见,但最后一步是写入 BitStream。

想法,引用,任何人?谢谢!

最佳答案

作为记录,这是我最终得到的 BitStream 实现。它主要基于宏,并使用 32 位累加器。位存储在累加器中,从最高有效位到最低有效位。例如,要测试下一位是否已设置,可以使用 (accumulator & 0x80000000)。这使得无需大量操作 BitStream 即可进行多项测试变得非常实用。对于写入,我在累加器中累加位,并在满时自动将它们刷新到输出缓冲区。当所有位都已写入时,也可以手动触发刷新。您的里程可能会有所不同,但我对此非常满意。我已经用它来实现 Microsoft Point to Point Compression (MPPC),它使用霍夫曼编码,性能非常好。

struct _wBitStream
{
    BYTE* buffer;
    BYTE* pointer;
    DWORD position;
    DWORD length;
    DWORD capacity;
    UINT32 mask;
    UINT32 offset;
    UINT32 prefetch;
    UINT32 accumulator;
};
typedef struct _wBitStream wBitStream;

#define BitStream_Prefetch(_bs) do { \
    (_bs->prefetch) = 0; \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 4)) \
        (_bs->prefetch) |= (*(_bs->pointer + 4) << 24); \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 5)) \
        (_bs->prefetch) |= (*(_bs->pointer + 5) << 16); \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 6)) \
        (_bs->prefetch) |= (*(_bs->pointer + 6) << 8); \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 7)) \
        (_bs->prefetch) |= (*(_bs->pointer + 7) << 0); \
} while(0)

#define BitStream_Fetch(_bs) do { \
    (_bs->accumulator) = 0; \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 0)) \
        (_bs->accumulator) |= (*(_bs->pointer + 0) << 24); \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 1)) \
        (_bs->accumulator) |= (*(_bs->pointer + 1) << 16); \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 2)) \
        (_bs->accumulator) |= (*(_bs->pointer + 2) << 8); \
    if (((UINT32) (_bs->pointer - _bs->buffer)) <(_bs->capacity + 3)) \
        (_bs->accumulator) |= (*(_bs->pointer + 3) << 0); \
    BitStream_Prefetch(_bs); \
} while(0)

#define BitStream_Flush(_bs) do { \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 0)) \
        *(_bs->pointer + 0) = (_bs->accumulator >> 24); \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 1)) \
        *(_bs->pointer + 1) = (_bs->accumulator >> 16); \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 2)) \
        *(_bs->pointer + 2) = (_bs->accumulator >> 8); \
    if (((UINT32) (_bs->pointer - _bs->buffer)) < (_bs->capacity + 3)) \
        *(_bs->pointer + 3) = (_bs->accumulator >> 0); \
} while(0)

#define BitStream_Shift(_bs, _nbits) do { \
    _bs->accumulator <<= _nbits; \
    _bs->position += _nbits; \
    _bs->offset += _nbits; \
    if (_bs->offset < 32) { \
        _bs->mask = ((1 << _nbits) - 1); \
        _bs->accumulator |= ((_bs->prefetch >> (32 - _nbits)) & _bs->mask); \
        _bs->prefetch <<= _nbits; \
    } else { \
        _bs->mask = ((1 << _nbits) - 1); \
        _bs->accumulator |= ((_bs->prefetch >> (32 - _nbits)) & _bs->mask); \
        _bs->prefetch <<= _nbits; \
        _bs->offset -= 32; \
        _bs->pointer += 4; \
        BitStream_Prefetch(_bs); \
        if (_bs->offset) { \
            _bs->mask = ((1 << _bs->offset) - 1); \
            _bs->accumulator |= ((_bs->prefetch >> (32 - _bs->offset)) & _bs->mask); \
            _bs->prefetch <<= _bs->offset; \
        } \
    } \
} while(0)

#define BitStream_Write_Bits(_bs, _bits, _nbits) do { \
    _bs->position += _nbits; \
    _bs->offset += _nbits; \
    if (_bs->offset < 32) { \
        _bs->accumulator |= (_bits << (32 - _bs->offset)); \
    } else { \
        _bs->offset -= 32; \
        _bs->mask = ((1 << (_nbits - _bs->offset)) - 1); \
        _bs->accumulator |= ((_bits >> _bs->offset) & _bs->mask); \
        BitStream_Flush(bs); \
        _bs->accumulator = 0; \
        _bs->pointer += 4; \
        if (_bs->offset) { \
            _bs->mask = ((1 << _bs->offset) - 1); \
            _bs->accumulator |= ((_bits & _bs->mask) << (32 - _bs->offset)); \
        } \
    } \
} while(0)

void BitStream_Attach(wBitStream* bs, BYTE* buffer, UINT32 capacity)
{
    bs->position = 0;
    bs->buffer = buffer;
    bs->offset = 0;
    bs->accumulator = 0;
    bs->pointer = bs->buffer;
    bs->capacity = capacity;
    bs->length = bs->capacity * 8;
}

wBitStream* BitStream_New()
{
    wBitStream* bs = NULL;

    bs = (wBitStream*) calloc(1, sizeof(wBitStream));

    return bs;
}

void BitStream_Free(wBitStream* bs)
{
    free(bs);
}

关于c - 高效的 C 比特流实现,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/22202879/

相关文章:

c++ - 为什么libeay32中SHA512函数的符号没有前导下划线?

c - 需要帮助追踪运行时检查失败 #2 - 变量 'list' 周围的堆栈已损坏

c - XC8编译器: program returning to beginning of main()

ffmpeg - 将 WAV 转换为 TETRA 格式

image - ffmpeg在从图像生成的视频中缺少图像帧

c - 有没有办法在运行时确定可用的堆栈空间?

java - Java 中(未)压缩的 base64 编码字符串 (Android)

windows - 在命令行创建一个 tar.xz 文件

javascript - 在javascript中压缩一个blob

audio - 从哪里开始学习音频或视频编解码器?