The hash function on my bloom filter implementation is not properly storing the computed hash

44 views Asked by At

I have the following bloom filter data structure, but I am not sure how to store and use the hash computed for each hash function.

#include <openssl/evp.h>
#include <bitset>
#include <memory>

#include "config.h"

class Bloomfilter {
public:
    explicit Bloomfilter(size_t hash_func_count = 4)
            : hash_function_count(hash_func_count),
              object_count_(0),
              MD5_hash_result_buffer(
                  std::make_unique<unsigned char[]>(MD5_result_size_bytes)
              ) {}

    void insert(const std::string &object) {
        hash(object);
        const auto *object_hashes =
            reinterpret_cast<const uint16_t *>(MD5_hash_result_buffer.get());

        for (size_t i = 0; i < hash_function_count; i++) {
            const uint16_t index_to_set = object_hashes[i];
            bloomfilter_store_[index_to_set] = true;
        }
        ++object_count_;
    }

    void insert(unsigned int object) {
        std::string object_str = std::to_string(object);
        insert(object_str);
    }

    template <typename InputIterator>
    inline void insert(const InputIterator begin, const InputIterator end)
    {
        InputIterator itr = begin;

        while (end != itr)
        {
            insert(*(itr++));
        }
    }  

private:
    static constexpr size_t MD5_result_size_bytes = 16;

    static constexpr size_t bloomfilter_store_size = HASH_SIZE;

    static constexpr size_t bytes_per_hash_function = 2;

    static void hash(const std::string &val) {
        const auto *const md5_input_val =
            reinterpret_cast<const unsigned char *>(val.data());
        const size_t md5_input_length = val.length();
        EVP_MD_CTX *mdctx;
        unsigned char *md5_digest;
        unsigned int md5_digest_len = EVP_MD_size(EVP_md5());

        // MD5_Init
        mdctx = EVP_MD_CTX_new();
        EVP_DigestInit_ex(mdctx, EVP_md5(), nullptr);

        // MD5_Update
        EVP_DigestUpdate(mdctx, md5_input_val, md5_input_length);

        // MD5_Final
        md5_digest = (unsigned char *)OPENSSL_malloc(md5_digest_len);
        EVP_DigestFinal_ex(mdctx, md5_digest, &md5_digest_len);
        EVP_MD_CTX_free(mdctx);
    }

    const size_t hash_function_count;

    std::bitset<bloomfilter_store_size> bloomfilter_store_;
    size_t object_count_;

    const std::unique_ptr<unsigned char[]> MD5_hash_result_buffer;
};

How can I compute a hash using openssl/evp and update bloomfilter_store_?

0

There are 0 answers