I have the following bloom filter data structure, but I am not sure how to store and use the hash computed for each hash function.
#include <openssl/evp.h>
#include <bitset>
#include <memory>
#include "config.h"
class Bloomfilter {
public:
explicit Bloomfilter(size_t hash_func_count = 4)
: hash_function_count(hash_func_count),
object_count_(0),
MD5_hash_result_buffer(
std::make_unique<unsigned char[]>(MD5_result_size_bytes)
) {}
void insert(const std::string &object) {
hash(object);
const auto *object_hashes =
reinterpret_cast<const uint16_t *>(MD5_hash_result_buffer.get());
for (size_t i = 0; i < hash_function_count; i++) {
const uint16_t index_to_set = object_hashes[i];
bloomfilter_store_[index_to_set] = true;
}
++object_count_;
}
void insert(unsigned int object) {
std::string object_str = std::to_string(object);
insert(object_str);
}
template <typename InputIterator>
inline void insert(const InputIterator begin, const InputIterator end)
{
InputIterator itr = begin;
while (end != itr)
{
insert(*(itr++));
}
}
private:
static constexpr size_t MD5_result_size_bytes = 16;
static constexpr size_t bloomfilter_store_size = HASH_SIZE;
static constexpr size_t bytes_per_hash_function = 2;
static void hash(const std::string &val) {
const auto *const md5_input_val =
reinterpret_cast<const unsigned char *>(val.data());
const size_t md5_input_length = val.length();
EVP_MD_CTX *mdctx;
unsigned char *md5_digest;
unsigned int md5_digest_len = EVP_MD_size(EVP_md5());
// MD5_Init
mdctx = EVP_MD_CTX_new();
EVP_DigestInit_ex(mdctx, EVP_md5(), nullptr);
// MD5_Update
EVP_DigestUpdate(mdctx, md5_input_val, md5_input_length);
// MD5_Final
md5_digest = (unsigned char *)OPENSSL_malloc(md5_digest_len);
EVP_DigestFinal_ex(mdctx, md5_digest, &md5_digest_len);
EVP_MD_CTX_free(mdctx);
}
const size_t hash_function_count;
std::bitset<bloomfilter_store_size> bloomfilter_store_;
size_t object_count_;
const std::unique_ptr<unsigned char[]> MD5_hash_result_buffer;
};
How can I compute a hash using openssl/evp and update bloomfilter_store_?