c - 内存中 B-Tree 插入的实现

标签 c algorithm tree associative-array b-tree

我一直在创建 B 树的内存中 C 实现,因为我在网上找不到可读的代码(即这个可怕的代码:http://www.freewebs.com/attractivechaos/kbtree.h.html)。

它不太有效,因为在插入元素时偶尔会找不到以前插入的元素。此外,我不确定我的总体实现是否非常好,以及我是否以明智的方式进行插入。任何人都可以批评我所做的并找出为什么它总是不起作用吗?

显然,B 树比红黑树或 AVL 树更高效,因为每个节点的元素一起存储在内存中。这引起了我的兴趣。

请注意,“顺序”是元素的数量而不是子指针的数量。原因很简单,因为它对我来说更有意义。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#define CB_BTREE_ORDER 8
#define CB_BTREE_HALF_ORDER CB_BTREE_ORDER/2

typedef struct{
    void * parent;
    void * children[CB_BTREE_ORDER + 1];
    unsigned char numElements;
} CBBTreeNode;

typedef struct{
    unsigned char found;
    CBBTreeNode * node;
    unsigned char pos;
} CBFindResult;

typedef struct{
    unsigned char keySize;
    unsigned char dataSize;
    int nodeSize;
    CBBTreeNode * root;
} CBAssociativeArray;

CBFindResult CBAssociativeArrayFind(CBAssociativeArray * self, unsigned char * key);
void CBAssociativeArrayInsert(CBAssociativeArray * self, unsigned char * key, void * data, CBFindResult pos, CBBTreeNode * right);
CBFindResult CBBTreeNodeBinarySearch(CBBTreeNode * self, unsigned char * key, unsigned char keySize);
void CBInitAssociativeArray(CBAssociativeArray * self, unsigned char keySize, unsigned char dataSize);

CBFindResult CBAssociativeArrayFind(CBAssociativeArray * self, unsigned char * key){
    CBFindResult result;
    CBBTreeNode * node = self->root;
    for (;;) {
        result = CBBTreeNodeBinarySearch(node, key, self->keySize);
        if (result.found){
            result.node = node;
            return result;
        }else{
            if (node->children[result.pos])
                node = node->children[result.pos];
            else{
                result.node = node;
                return result;
            }
        }
    }
}
void CBAssociativeArrayInsert(CBAssociativeArray * self, unsigned char * key, void * data, CBFindResult pos, CBBTreeNode * right){
    // See if we can insert data in this node
    unsigned char * keys = (unsigned char *)(pos.node + 1);
    unsigned char * dataElements = keys + self->keySize * CB_BTREE_ORDER;
    if (pos.node->numElements < CB_BTREE_ORDER) {
        if (pos.node->numElements > pos.pos){
            memmove(keys + (pos.pos + 1) * self->keySize, keys + pos.pos * self->keySize, (pos.node->numElements - pos.pos) * self->keySize);
            memmove(dataElements + (pos.pos + 1) * self->dataSize, dataElements + pos.pos * self->dataSize, (pos.node->numElements - pos.pos) * self->dataSize);
            memmove(pos.node->children + pos.pos + 2, pos.node->children + pos.pos + 1, (pos.node->numElements - pos.pos) *  sizeof(*pos.node->children));
        }
        memcpy(keys + pos.pos * self->keySize, key, self->keySize);
        memcpy(dataElements + pos.pos * self->dataSize, data, self->dataSize);
        pos.node->children[pos.pos + 1] = right;
        pos.node->numElements++;
    }else{
        CBBTreeNode * new = malloc(self->nodeSize);
        unsigned char * newKeys = (unsigned char *)(new + 1);
        unsigned char * newData = newKeys + self->keySize * CB_BTREE_ORDER;
        new->numElements = CB_BTREE_HALF_ORDER;
        pos.node->numElements = CB_BTREE_HALF_ORDER;
        unsigned char * midKey;
        unsigned char * midVal;
        if (pos.pos >= CB_BTREE_HALF_ORDER) {
            if (pos.pos == CB_BTREE_HALF_ORDER) {
                memcpy(newKeys, keys + CB_BTREE_HALF_ORDER * self->keySize, CB_BTREE_HALF_ORDER * self->keySize);
                memcpy(newData, dataElements + CB_BTREE_HALF_ORDER * self->dataSize, CB_BTREE_HALF_ORDER * self->dataSize);
                memcpy(new->children + 1, pos.node->children + CB_BTREE_HALF_ORDER + 1, CB_BTREE_HALF_ORDER * sizeof(*new->children));
                new->children[0] = right;
                midKey = key;
                midVal = data;
            }else{
                if (pos.pos > CB_BTREE_HALF_ORDER + 1){
                    memcpy(newKeys, keys + (CB_BTREE_HALF_ORDER + 1) * self->keySize, (pos.pos - CB_BTREE_HALF_ORDER - 1) * self->keySize);
                    memcpy(newData, dataElements + (CB_BTREE_HALF_ORDER + 1) * self->dataSize, (pos.pos - CB_BTREE_HALF_ORDER - 1)  * self->dataSize);
                }
                memcpy(newKeys + (pos.pos - CB_BTREE_HALF_ORDER - 1) * self->keySize, key, self->keySize);
                memcpy(newData + (pos.pos - CB_BTREE_HALF_ORDER - 1) * self->dataSize, data, self->dataSize);
                memcpy(newKeys + (pos.pos - CB_BTREE_HALF_ORDER) * self->keySize, keys + pos.pos * self->keySize, (CB_BTREE_ORDER - pos.pos) * self->keySize);
                memcpy(newData + (pos.pos - CB_BTREE_HALF_ORDER) * self->dataSize, dataElements + pos.pos * self->dataSize, (CB_BTREE_ORDER - pos.pos) * self->dataSize); // o 0 i 1 ii 2 iii 3 iv
                memcpy(new->children, pos.node->children + CB_BTREE_HALF_ORDER + 1, (pos.pos - CB_BTREE_HALF_ORDER) * sizeof(*new->children));
                new->children[pos.pos - CB_BTREE_HALF_ORDER] = right;
                if (CB_BTREE_ORDER > pos.pos)
                    memcpy(new->children + pos.pos - CB_BTREE_HALF_ORDER + 1, pos.node->children + pos.pos + 1, (CB_BTREE_ORDER - pos.pos) * sizeof(*new->children));
                midKey = keys + CB_BTREE_HALF_ORDER * self->keySize;
                midVal = dataElements + CB_BTREE_HALF_ORDER * self->dataSize;
            }
        }else{
            memcpy(newKeys, keys + CB_BTREE_HALF_ORDER * self->keySize, CB_BTREE_HALF_ORDER * self->keySize);
            memcpy(newData, dataElements + CB_BTREE_HALF_ORDER * self->dataSize, CB_BTREE_HALF_ORDER * self->dataSize);
            memcpy(new->children, pos.node->children + CB_BTREE_HALF_ORDER, (CB_BTREE_HALF_ORDER + 1) * sizeof(*new->children));
            memmove(keys + (pos.pos + 1) * self->keySize, keys + pos.pos * self->keySize, (CB_BTREE_HALF_ORDER - pos.pos) * self->keySize);
            memmove(dataElements + (pos.pos + 1) * self->dataSize, dataElements + pos.pos * self->dataSize, (CB_BTREE_HALF_ORDER - pos.pos) * self->dataSize);
            if (CB_BTREE_HALF_ORDER > 1 + pos.pos)
                memmove(pos.node->children + pos.pos + 2, pos.node->children + pos.pos + 1, (CB_BTREE_HALF_ORDER - pos.pos - 1) * self->dataSize);
            memcpy(keys + pos.pos * self->keySize, key, self->keySize);
            memcpy(dataElements + pos.pos * self->dataSize, data, self->dataSize);
            pos.node->children[pos.pos + 1] = right;
            midKey = keys + CB_BTREE_HALF_ORDER * self->keySize;
            midVal = dataElements + CB_BTREE_HALF_ORDER * self->dataSize;
        }
        if ( ! pos.node->parent) {
            self->root = malloc(self->nodeSize);
            self->root->numElements = 0;
            self->root->parent = NULL;
            pos.node->parent = self->root;
            self->root->children[0] = pos.node;
        }
        new->parent = pos.node->parent;
        CBFindResult res = CBBTreeNodeBinarySearch(pos.node->parent, midKey, self->keySize);
        res.node = pos.node->parent;
        return CBAssociativeArrayInsert(self, midKey, midVal, res, new);
    }
}
CBFindResult CBBTreeNodeBinarySearch(CBBTreeNode * self, unsigned char * key, unsigned char keySize){
    CBFindResult res;
    res.found = 0;
    if ( ! self->numElements) {
        res.pos = 0;
        return res;
    }
    unsigned char left = 0;
    unsigned char right = self->numElements - 1;
    unsigned char * keys = (unsigned char *)(self + 1);
    int cmp;
    while (left <= right) {
        res.pos = (right+left)/2;
        cmp = memcmp(key, keys + res.pos * keySize, keySize);
        if (cmp == 0) {
            res.found = 1;
            break;
        }else if (cmp < 0){
            if ( ! res.pos)
                break;
            right = res.pos - 1;
        }else
            left = res.pos + 1;
    }
    if (cmp > 0)
        res.pos++;
    return res;
}
void CBInitAssociativeArray(CBAssociativeArray * self, unsigned char keySize, unsigned char dataSize){
    self->keySize = keySize;
    self->dataSize = dataSize;
    self->nodeSize = sizeof(*self->root) + (keySize + dataSize) * CB_BTREE_ORDER;
    self->root = malloc(self->nodeSize);
    self->root->parent = NULL;
    self->root->numElements = 0;
    for (unsigned char x = 0; x < CB_BTREE_ORDER + 1; x++)
        self->root->children[x] = NULL;
}

int main(){
    srand(1);
    CBAssociativeArray array;
    CBInitAssociativeArray(&array, 10, 10);
    int size = CB_BTREE_ORDER * (CB_BTREE_ORDER + 2) * 10;;
    unsigned char * keys = malloc(size);
    for (int x = 0; x < size; x++) {
        keys[x] = rand();
    }
    for (int x = 0; x < size; x += 10) {
        CBAssociativeArrayInsert(&array, keys + x, keys + x, CBAssociativeArrayFind(&array, keys + x), NULL);
        for (int y = 0; y <= x; y += 10) {
            if ( ! CBAssociativeArrayFind(&array, keys + y).found) {
                printf("RANDOM FIND FAIL %u - %u\n", y, x);
                return 1;
            }
        }
    }
    return 0;
}

谢谢。

最佳答案

问题与 memcpy 或 memmove 操作无关。我在拆分节点时忘记更改父指针。

在节点被拆分后添加这个将使其工作:

if (new->children[0])
    for (unsigned char x = 0; x < CB_BTREE_HALF_ORDER + 1; x++) {
        CBBTreeNode * child = new->children[x];
        child->parent = new;
    }

忘记添加这个是非常愚蠢的。该算法现在适用于许多随机插入。

关于c - 内存中 B-Tree 插入的实现,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/13425450/

相关文章:

c++ - 梯度和内存

c++ - 你知道学习运算符优先级和结合性的心理装置或技巧吗?

c# - C# 中的简单归并排序

通过节点找到最佳组合或路径的算法

optimization - 最适合动态语言字段访问的数据结构

c - ncurses.h 确定窗口边框

algorithm - 从 a^b 的右边找到第 k 个数字的最有效算法是什么,即 a 的幂 b

python - 什么是交叉制表的良好数据模型?

arrays - 树木: Linked Lists vs Arrays (Efficiency)

c - 在符号链接(symbolic link)的情况下安全地更新文件 - C