c++ - unordered_set 的散列自定义指针类型

标签 c++ c++11 hash unordered-set

我正在尝试对 Edge 结构进行哈希处理,以便我可以拥有具有唯一边的 unordered_set 。在我的例子中,如果一条边的两个端点的组合之前在集合中没有遇到过,则该边被认为是唯一的。

虽然我的代码适用于仅包含 Edge 类型的 unordered_set,但我无法让它适用于指向 Edge 类型的指针。请参阅下面我的有点冗长的代码。非常感谢任何帮助。

#include <iostream>
#include <iomanip>
#include <algorithm>
#include <unordered_set>

struct Vector3
{
    float x, y, z;

    Vector3() {}

    Vector3(float xx, float yy, float zz)
    {
        x = xx, y = yy, z = zz;
    }

    inline bool operator==(const Vector3& other) const
    {
        return x == other.x && y == other.y && z == other.z;
    }

    friend std::ostream& operator<<(std::ostream& stream, const Vector3& vector);
};

std::ostream& operator<<(std::ostream& stream, const Vector3& vector)
{
    return stream 
        << "(" 
        << std::setw(2) << std::setfill(' ') << vector.x << ", " 
        << std::setw(2) << std::setfill(' ') << vector.y << ", " 
        << std::setw(2) << std::setfill(' ') << vector.z 
        << ")";
}

struct Edge
{
    Vector3 EndPoints[2];

    Edge() {}

    Edge(Vector3 p, Vector3 q)
    {
        EndPoints[0] = p;
        EndPoints[1] = q;
    }

    inline bool operator==(const Edge& other) const
    {
        return  (EndPoints[0] == other.EndPoints[0] || EndPoints[0] == other.EndPoints[1]) && 
                (EndPoints[1] == other.EndPoints[1] || EndPoints[1] == other.EndPoints[0]);
    }

    inline bool operator==(const Edge* other) const
    {
        return  (EndPoints[0] == other->EndPoints[0] || EndPoints[0] == other->EndPoints[1]) && 
                (EndPoints[1] == other->EndPoints[1] || EndPoints[1] == other->EndPoints[0]);
    }

    friend std::ostream& operator<<(std::ostream& stream, const Edge& vector);
    friend std::ostream& operator<<(std::ostream& stream, const Edge* vector);
};

std::ostream& operator<<(std::ostream& stream, const Edge& edge)
{
    return stream << edge.EndPoints[0] << " -- " << edge.EndPoints[1];
}

std::ostream& operator<<(std::ostream& stream, const Edge* edge)
{
    return stream << edge->EndPoints[0] << " -- " << edge->EndPoints[1];
}


namespace std
{
    template <>
    struct hash<Edge>
    {
        std::size_t operator()(const Edge& edge) const
        {
            Vector3 p0 = edge.EndPoints[0];
            Vector3 p1 = edge.EndPoints[1];

            if (p1.x < p0.x) std::swap(p0.x, p1.x);
            if (p1.y < p0.y) std::swap(p0.y, p1.y);
            if (p1.z < p0.z) std::swap(p0.z, p1.z);

            unsigned hash0 = (int(p0.x*73856093) ^ int(p0.y*19349663) ^ int(p0.z*83492791)) % 1024;
            unsigned hash1 = (int(p1.x*73856093) ^ int(p1.y*19349663) ^ int(p1.z*83492791)) % 1024;

            return hash0 ^ (hash1 << 3);
        }
    };

    template <>
    struct hash<Edge*>
    {
        std::size_t operator()(const Edge* edge) const
        {
            Vector3 p0 = edge->EndPoints[0];
            Vector3 p1 = edge->EndPoints[1];

            if (p1.x < p0.x) std::swap(p0.x, p1.x);
            if (p1.y < p0.y) std::swap(p0.y, p1.y);
            if (p1.z < p0.z) std::swap(p0.z, p1.z);

            unsigned hash0 = (int(p0.x*73856093) ^ int(p0.y*19349663) ^ int(p0.z*83492791)) % 1024;
            unsigned hash1 = (int(p1.x*73856093) ^ int(p1.y*19349663) ^ int(p1.z*83492791)) % 1024;

            std::size_t key = hash0 ^ (hash1 << 3);
            return key;
        }
    };
}


void add_edge(std::unordered_set<Edge>& table, Edge edge)
{
    std::unordered_set<Edge>::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}

void add_edge(std::unordered_set<Edge*>& table, Edge* edge)
{
    std::unordered_set<Edge*>::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}


void print_table(std::unordered_set<Edge>& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *it << std::endl;

    std::cout << std::endl;
}

void print_table(std::unordered_set<Edge*>& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *(*it) << std::endl;

    std::cout << std::endl;
}


int main()
{
    std::unordered_set<Edge> table;
    std::unordered_set<Edge*> ptable;

    Edge e0(Vector3( 1.f,  0.f,  0.f), Vector3(-1.f,  0.f,  0.f));
    Edge e1(Vector3(-1.f,  0.f,  0.f), Vector3( 1.f,  0.f,  0.f));

    add_edge(table, e0);
    add_edge(table, e1);

    print_table(table);

    add_edge(ptable, &e0);
    add_edge(ptable, &e1);

    print_table(ptable);

    return 0;
}

这是输出:

1>  Table already contains edge (-1,  0,  0) -- ( 1,  0,  0)
1>  
1>  Table has 1 elements:
1>  ( 1,  0,  0) -- (-1,  0,  0)
1>  
1>  Table has 2 elements:
1>  (-1,  0,  0) -- ( 1,  0,  0)
1>  ( 1,  0,  0) -- (-1,  0,  0)

所以我的问题是:为什么第二个元素添加到第二个表中?我已经检查了哈希函数,但它为两个条目返回相同的 key ,因此这似乎不是罪魁祸首,但我不确定它可能是什么。

编辑:

我现在发现 inline bool operator==(const Edge* other) const 没有被调用,但我不确定为什么。

最佳答案

Angew指出了真正的问题。

还有其他问题。看来您希望 Edges 始终是双向的,因此 Edge(a,b) == Edge(b,a)。

Side Note The best way (IMO) to achieve this, is to order the end-points in deterministic order during Edge construction. No need to think about it later. This is called an invariant and removes the burden to check for 'equivalence' of Edges throughout all the rest of the code.

但是,您的哈希函数没有正确实现这一点

您的hash<>::operator()内容如下:

    std::size_t operator()(const Edge& edge) const
    {
        Vector3 p0 = edge.EndPoints[0];
        Vector3 p1 = edge.EndPoints[1];

        if (p1.x < p0.x) std::swap(p0.x, p1.x);
        if (p1.y < p0.y) std::swap(p0.y, p1.y);
        if (p1.z < p0.z) std::swap(p0.z, p1.z);

        unsigned hash0 = (int(p0.x*73856093) ^ int(p0.y*19349663) ^ int(p0.z*83492791)) % 1024;
        unsigned hash1 = (int(p1.x*73856093) ^ int(p1.y*19349663) ^ int(p1.z*83492791)) % 1024;

        return hash0 ^ (hash1 << 3);
    }

这种交换逻辑有效地构成了虚假端点

Edge(ep[3,1,2], ep[1,2,3])变成Edge(ep[1,1,2], ep[3,2,3])您可能想要的地方 Edge(ep[1,2,3], ep[3,1,2]) .

修复它会交换整个端点,而不是单个 vector 元素:

if (std::tie(p1.x, p1.y, p1.z) < std::tie(p0.x, p0.y, p0.z)) {
    using std::swap;
    swap(p0, p1);
}

通过删除(所有)不必要的重复代码来修复哈希函数:

template <> struct hash<Edge>
{
    std::size_t operator()(const Edge& edge) const {
        Vector3 p0 = edge.EndPoints[0];
        Vector3 p1 = edge.EndPoints[1];

        if (std::tie(p0.x, p0.y, p0.z) < 
            std::tie(p1.x, p1.y, p1.z))  // consider`Vector3::operator<`
        {
            using std::swap;
            swap(p0, p1);
        }

        auto hash_p = [](Vector3 const& p) { return (unsigned(p.x*73856093u) ^ unsigned(p.y*19349663u) ^ unsigned(p.z*83492791u)) % 1024u; };

        return hash_p(p0) ^ (hash_p(p1) << 3);
    }
};

指针哈希就变成了单纯的转发:

template <> struct hash<Edge*> {
    std::size_t operator()(const Edge* edge) const { 
        return hash<Edge>()(*edge); 
    }
};

考虑将比较移至 Vector3::operator<

固定测试程序

实现上述内容,并修复 Edge* 缺少的相等比较器:

另请参阅 live on IdeOne

#include <iostream>
#include <iomanip>
#include <unordered_set>
#include <cassert>
#include <tuple>

struct Vector3
{
    float x, y, z;

    Vector3() {}

    Vector3(float xx, float yy, float zz)
    {
        x = xx, y = yy, z = zz;
    }

    inline bool operator==(const Vector3& other) const
    {
        return x == other.x && y == other.y && z == other.z;
    }

    inline bool operator<(const Vector3& other) const
    {
        return std::tie(x, y, z) < std::tie(other.x, other.y, other.z);
    }

    friend std::ostream& operator<<(std::ostream& stream, const Vector3& vector);
};

std::ostream& operator<<(std::ostream& stream, const Vector3& vector)
{
    return stream 
        << "(" 
        << std::setw(2) << std::setfill(' ') << vector.x << ", " 
        << std::setw(2) << std::setfill(' ') << vector.y << ", " 
        << std::setw(2) << std::setfill(' ') << vector.z 
        << ")";
}

struct Edge
{
    Vector3 EndPoints[2];

    Edge() {}

    Edge(Vector3 p, Vector3 q)
    {
        // swap order
        if (q < p) { using std::swap; swap(p, q); } // the invariant
        EndPoints[0] = p;
        EndPoints[1] = q;
    }

    inline bool operator==(const Edge& other) const {
        return std::tie(EndPoints[0], EndPoints[1]) == std::tie(other.EndPoints[0], other.EndPoints[1]);
    }

    friend std::ostream& operator<<(std::ostream& stream, const Edge& vector);
    friend std::ostream& operator<<(std::ostream& stream, const Edge* vector);
};

std::ostream& operator<<(std::ostream& stream, const Edge& edge)
{
    return stream << edge.EndPoints[0] << " -- " << edge.EndPoints[1];
}

std::ostream& operator<<(std::ostream& stream, const Edge* edge)
{
    return stream << edge->EndPoints[0] << " -- " << edge->EndPoints[1];
}


namespace std
{
    template <> struct hash<Edge>
    {
        std::size_t operator()(const Edge& edge) const {
            assert(edge.EndPoints[0] < edge.EndPoints[1]); // the invariant

            auto hash_p = [](Vector3 const& p) { return (unsigned(p.x*73856093u) ^ unsigned(p.y*19349663u) ^ unsigned(p.z*83492791u)) % 1024u; };

            return hash_p(edge.EndPoints[0]) ^ (hash_p(edge.EndPoints[1]) << 3);
        }
    };

    template <> struct hash<Edge*> {
        std::size_t operator()(const Edge* edge) const { 
            return hash<Edge>()(*edge); 
        }
    };
}

struct EdgePtrEqual {
    bool operator()(Edge const* a, Edge const* b) const {
        return *a == *b;
    }
};

using EdgeSet    = std::unordered_set<Edge,  std::hash<Edge>>;
using EdgePtrSet = std::unordered_set<Edge*, std::hash<Edge*>, EdgePtrEqual>;

void add_edge(EdgeSet& table, Edge edge)
{
    EdgeSet::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}

void add_edge(EdgePtrSet& table, Edge* edge)
{
    EdgePtrSet::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}


void print_table(EdgeSet& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *it << std::endl;

    std::cout << std::endl;
}

void print_table(EdgePtrSet& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *(*it) << std::endl;

    std::cout << std::endl;
}


int main()
{
    EdgeSet table;
    EdgePtrSet ptable;

    Edge e0(Vector3( 1.f,  0.f,  0.f), Vector3(-1.f,  0.f,  0.f));
    Edge e1(Vector3(-1.f,  0.f,  0.f), Vector3( 1.f,  0.f,  0.f));

    add_edge(table, e0);
    add_edge(table, e1);

    print_table(table);

    add_edge(ptable, &e0);
    add_edge(ptable, &e1);

    print_table(ptable);

    return 0;
}

关于c++ - unordered_set 的散列自定义指针类型,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/18635145/

相关文章:

c++ - 将 std::unique_ptr 插入 boost:ptr_map

c++ - VS2010 中的前向/强枚举

ruby-on-rails - Rails - 从参数哈希中获取所有键

c++ - atomic_flag 在 C++ 上的使用

c++ - 如何将数组缩放到不同大小的新数组

python - 存储和检索大量小型非结构化消息的最快方法

c++ - 使用 C++ 进行目录导航

c++ - std::thread::join 在 libcxx 中实现在哪里

algorithm - 保序散列函数

php - 如何使用河豚散列长密码(> 72 个字符)