Hashing custom pointer type for unordered_set

2.2k views Asked by At

I am trying to hash an Edge struct so that I can have a unordered_set with unique edges. In my case an edge is considered unique if the combination of its two endpoints were not encountered in the set before.

While my code works fine for an unordered_set that just contains Edge types, I cannot get it to work for pointers to Edge types. Please see my somewhat lengthy code below. Any help is extremely appreciated.

#include <iostream>
#include <iomanip>
#include <algorithm>
#include <unordered_set>

struct Vector3
{
    float x, y, z;

    Vector3() {}

    Vector3(float xx, float yy, float zz)
    {
        x = xx, y = yy, z = zz;
    }

    inline bool operator==(const Vector3& other) const
    {
        return x == other.x && y == other.y && z == other.z;
    }

    friend std::ostream& operator<<(std::ostream& stream, const Vector3& vector);
};

std::ostream& operator<<(std::ostream& stream, const Vector3& vector)
{
    return stream 
        << "(" 
        << std::setw(2) << std::setfill(' ') << vector.x << ", " 
        << std::setw(2) << std::setfill(' ') << vector.y << ", " 
        << std::setw(2) << std::setfill(' ') << vector.z 
        << ")";
}

struct Edge
{
    Vector3 EndPoints[2];

    Edge() {}

    Edge(Vector3 p, Vector3 q)
    {
        EndPoints[0] = p;
        EndPoints[1] = q;
    }

    inline bool operator==(const Edge& other) const
    {
        return  (EndPoints[0] == other.EndPoints[0] || EndPoints[0] == other.EndPoints[1]) && 
                (EndPoints[1] == other.EndPoints[1] || EndPoints[1] == other.EndPoints[0]);
    }

    inline bool operator==(const Edge* other) const
    {
        return  (EndPoints[0] == other->EndPoints[0] || EndPoints[0] == other->EndPoints[1]) && 
                (EndPoints[1] == other->EndPoints[1] || EndPoints[1] == other->EndPoints[0]);
    }

    friend std::ostream& operator<<(std::ostream& stream, const Edge& vector);
    friend std::ostream& operator<<(std::ostream& stream, const Edge* vector);
};

std::ostream& operator<<(std::ostream& stream, const Edge& edge)
{
    return stream << edge.EndPoints[0] << " -- " << edge.EndPoints[1];
}

std::ostream& operator<<(std::ostream& stream, const Edge* edge)
{
    return stream << edge->EndPoints[0] << " -- " << edge->EndPoints[1];
}


namespace std
{
    template <>
    struct hash<Edge>
    {
        std::size_t operator()(const Edge& edge) const
        {
            Vector3 p0 = edge.EndPoints[0];
            Vector3 p1 = edge.EndPoints[1];

            if (p1.x < p0.x) std::swap(p0.x, p1.x);
            if (p1.y < p0.y) std::swap(p0.y, p1.y);
            if (p1.z < p0.z) std::swap(p0.z, p1.z);

            unsigned hash0 = (int(p0.x*73856093) ^ int(p0.y*19349663) ^ int(p0.z*83492791)) % 1024;
            unsigned hash1 = (int(p1.x*73856093) ^ int(p1.y*19349663) ^ int(p1.z*83492791)) % 1024;

            return hash0 ^ (hash1 << 3);
        }
    };

    template <>
    struct hash<Edge*>
    {
        std::size_t operator()(const Edge* edge) const
        {
            Vector3 p0 = edge->EndPoints[0];
            Vector3 p1 = edge->EndPoints[1];

            if (p1.x < p0.x) std::swap(p0.x, p1.x);
            if (p1.y < p0.y) std::swap(p0.y, p1.y);
            if (p1.z < p0.z) std::swap(p0.z, p1.z);

            unsigned hash0 = (int(p0.x*73856093) ^ int(p0.y*19349663) ^ int(p0.z*83492791)) % 1024;
            unsigned hash1 = (int(p1.x*73856093) ^ int(p1.y*19349663) ^ int(p1.z*83492791)) % 1024;

            std::size_t key = hash0 ^ (hash1 << 3);
            return key;
        }
    };
}


void add_edge(std::unordered_set<Edge>& table, Edge edge)
{
    std::unordered_set<Edge>::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}

void add_edge(std::unordered_set<Edge*>& table, Edge* edge)
{
    std::unordered_set<Edge*>::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}


void print_table(std::unordered_set<Edge>& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *it << std::endl;

    std::cout << std::endl;
}

void print_table(std::unordered_set<Edge*>& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *(*it) << std::endl;

    std::cout << std::endl;
}


int main()
{
    std::unordered_set<Edge> table;
    std::unordered_set<Edge*> ptable;

    Edge e0(Vector3( 1.f,  0.f,  0.f), Vector3(-1.f,  0.f,  0.f));
    Edge e1(Vector3(-1.f,  0.f,  0.f), Vector3( 1.f,  0.f,  0.f));

    add_edge(table, e0);
    add_edge(table, e1);

    print_table(table);

    add_edge(ptable, &e0);
    add_edge(ptable, &e1);

    print_table(ptable);

    return 0;
}

This is the output:

1>  Table already contains edge (-1,  0,  0) -- ( 1,  0,  0)
1>  
1>  Table has 1 elements:
1>  ( 1,  0,  0) -- (-1,  0,  0)
1>  
1>  Table has 2 elements:
1>  (-1,  0,  0) -- ( 1,  0,  0)
1>  ( 1,  0,  0) -- (-1,  0,  0)

So my question is: why is the second element added to the second table? I have checked the hash function but it returns the same key for both entries, so that does not appear to be the culprit, but I'm not sure what else it could be.

Edit:

I have now found out that inline bool operator==(const Edge* other) const is not called, but I'm not sure why.

2

There are 2 answers

5
sehe On BEST ANSWER

Angew pointed out the real problem.

There are other issues. It seems that you want Edges to always be bi-directional, and therefore Edge(a,b) == Edge(b,a).

Side Note The best way (IMO) to achieve this, is to order the end-points in deterministic order during Edge construction. No need to think about it later. This is called an invariant and removes the burden to check for 'equivalence' of Edges throughout all the rest of the code.

However, your hash function don't implement this correctly

Your hash<>::operator() reads:

    std::size_t operator()(const Edge& edge) const
    {
        Vector3 p0 = edge.EndPoints[0];
        Vector3 p1 = edge.EndPoints[1];

        if (p1.x < p0.x) std::swap(p0.x, p1.x);
        if (p1.y < p0.y) std::swap(p0.y, p1.y);
        if (p1.z < p0.z) std::swap(p0.z, p1.z);

        unsigned hash0 = (int(p0.x*73856093) ^ int(p0.y*19349663) ^ int(p0.z*83492791)) % 1024;
        unsigned hash1 = (int(p1.x*73856093) ^ int(p1.y*19349663) ^ int(p1.z*83492791)) % 1024;

        return hash0 ^ (hash1 << 3);
    }

This swap logic effectively makes up bogus endpoints.

Edge(ep[3,1,2], ep[1,2,3]) becomes Edge(ep[1,1,2], ep[3,2,3]) where probably you wanted Edge(ep[1,2,3], ep[3,1,2]).

Fixing it would swap entire endpoints, instead of individual vector elements:

if (std::tie(p1.x, p1.y, p1.z) < std::tie(p0.x, p0.y, p0.z)) {
    using std::swap;
    swap(p0, p1);
}

Fixing your hash function by removing (all) unnecessarily duplicated code:

template <> struct hash<Edge>
{
    std::size_t operator()(const Edge& edge) const {
        Vector3 p0 = edge.EndPoints[0];
        Vector3 p1 = edge.EndPoints[1];

        if (std::tie(p0.x, p0.y, p0.z) < 
            std::tie(p1.x, p1.y, p1.z))  // consider`Vector3::operator<`
        {
            using std::swap;
            swap(p0, p1);
        }

        auto hash_p = [](Vector3 const& p) { return (unsigned(p.x*73856093u) ^ unsigned(p.y*19349663u) ^ unsigned(p.z*83492791u)) % 1024u; };

        return hash_p(p0) ^ (hash_p(p1) << 3);
    }
};

And the pointer hash becomes a mere forward:

template <> struct hash<Edge*> {
    std::size_t operator()(const Edge* edge) const { 
        return hash<Edge>()(*edge); 
    }
};

Consider moving the comparison into Vector3::operator<

Fixed Test Program

Implementing the above, and fixing the missing Equality comparer for Edge*:

Also see it live on IdeOne

#include <iostream>
#include <iomanip>
#include <unordered_set>
#include <cassert>
#include <tuple>

struct Vector3
{
    float x, y, z;

    Vector3() {}

    Vector3(float xx, float yy, float zz)
    {
        x = xx, y = yy, z = zz;
    }

    inline bool operator==(const Vector3& other) const
    {
        return x == other.x && y == other.y && z == other.z;
    }

    inline bool operator<(const Vector3& other) const
    {
        return std::tie(x, y, z) < std::tie(other.x, other.y, other.z);
    }

    friend std::ostream& operator<<(std::ostream& stream, const Vector3& vector);
};

std::ostream& operator<<(std::ostream& stream, const Vector3& vector)
{
    return stream 
        << "(" 
        << std::setw(2) << std::setfill(' ') << vector.x << ", " 
        << std::setw(2) << std::setfill(' ') << vector.y << ", " 
        << std::setw(2) << std::setfill(' ') << vector.z 
        << ")";
}

struct Edge
{
    Vector3 EndPoints[2];

    Edge() {}

    Edge(Vector3 p, Vector3 q)
    {
        // swap order
        if (q < p) { using std::swap; swap(p, q); } // the invariant
        EndPoints[0] = p;
        EndPoints[1] = q;
    }

    inline bool operator==(const Edge& other) const {
        return std::tie(EndPoints[0], EndPoints[1]) == std::tie(other.EndPoints[0], other.EndPoints[1]);
    }

    friend std::ostream& operator<<(std::ostream& stream, const Edge& vector);
    friend std::ostream& operator<<(std::ostream& stream, const Edge* vector);
};

std::ostream& operator<<(std::ostream& stream, const Edge& edge)
{
    return stream << edge.EndPoints[0] << " -- " << edge.EndPoints[1];
}

std::ostream& operator<<(std::ostream& stream, const Edge* edge)
{
    return stream << edge->EndPoints[0] << " -- " << edge->EndPoints[1];
}


namespace std
{
    template <> struct hash<Edge>
    {
        std::size_t operator()(const Edge& edge) const {
            assert(edge.EndPoints[0] < edge.EndPoints[1]); // the invariant

            auto hash_p = [](Vector3 const& p) { return (unsigned(p.x*73856093u) ^ unsigned(p.y*19349663u) ^ unsigned(p.z*83492791u)) % 1024u; };

            return hash_p(edge.EndPoints[0]) ^ (hash_p(edge.EndPoints[1]) << 3);
        }
    };

    template <> struct hash<Edge*> {
        std::size_t operator()(const Edge* edge) const { 
            return hash<Edge>()(*edge); 
        }
    };
}

struct EdgePtrEqual {
    bool operator()(Edge const* a, Edge const* b) const {
        return *a == *b;
    }
};

using EdgeSet    = std::unordered_set<Edge,  std::hash<Edge>>;
using EdgePtrSet = std::unordered_set<Edge*, std::hash<Edge*>, EdgePtrEqual>;

void add_edge(EdgeSet& table, Edge edge)
{
    EdgeSet::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}

void add_edge(EdgePtrSet& table, Edge* edge)
{
    EdgePtrSet::const_iterator it = table.find(edge);
    if (it == table.end()) table.insert(edge);
    else std::cout << "Table already contains edge " << edge << std::endl;
}


void print_table(EdgeSet& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *it << std::endl;

    std::cout << std::endl;
}

void print_table(EdgePtrSet& table)
{
    std::cout << std::endl;
    std::cout << "Table has " << table.size() << " elements:" << std::endl;

    for (auto it = table.begin(); it != table.end(); ++it)
        std::cout << *(*it) << std::endl;

    std::cout << std::endl;
}


int main()
{
    EdgeSet table;
    EdgePtrSet ptable;

    Edge e0(Vector3( 1.f,  0.f,  0.f), Vector3(-1.f,  0.f,  0.f));
    Edge e1(Vector3(-1.f,  0.f,  0.f), Vector3( 1.f,  0.f,  0.f));

    add_edge(table, e0);
    add_edge(table, e1);

    print_table(table);

    add_edge(ptable, &e0);
    add_edge(ptable, &e1);

    print_table(ptable);

    return 0;
}
1
Angew is no longer proud of SO On

Adding a specialisation of std::hash<Edge*> makes the hasher inconsistent with the equality comparison. Because the elements in the set are pointers, they're compared using normal pointer equality. Your lopsided operator==(const Edge*) would only be called for comparing an Edge to an Edge* (which is definitely not what you want).

You need to provide a comparator consistent with your hasher. The default comparator is std::equal_to<Key>. I don't think you can (or want to) specialise std::equal_to<Edge*>, so you should just provide your own comparator as the third template argument to the unordered_set.

Unrelated hint: It will require much less maintenance if you implement the pointer hasher like this:

template <>
struct hash<Edge*>
{
    std::size_t operator()(const Edge* edge) const
    {
        std::hash<Edge> h;
        return h(*edge);
    }
};