C++: proper use of HASH and Collision Techniques

773 views Asked by At

Should there be a size limitation on how LARGE a Hash Table may be?

I'm a little baffled as I can see why creating too small of a Hash Table should be causing me issues? It appears that too large of a Hash Table is causing my probe to throw a Sig error? Here is my code if anyone has experience with Hash Tables. I certainly appreciate any advice you have to offer (beyond taking up knitting instead, please):

#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <math.h>

using namespace std;

struct TABLE{
  int key;
  TABLE* next;
};
const int MAX_KEYS = 5000;
const int RANDOM = 30000;

int randNUMS(int *rand);
int hashTableSize();
int HASH(int key,int listSIZE);
void threeHashMethods(int *randARRAY,int tbSIZE);
int* openAddressing(int *randARRAY,int tbSIZE);
int seperateCHAINING();
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe);
int doubleHASH(int key,int tbSIZE);
void listSEARCH(int *randARRAY,int *HT,int tbSIZE);

int main(){
  int tbSIZE = 0;
  int randARRAY[MAX_KEYS];
  for(int a = 0; a <= MAX_KEYS; a++){
    randARRAY[a] = 0;
  }

  ///create random array of 5,000 unique int
  ///they will be of values between 1-30000
  randNUMS(randARRAY);
  ///get hash table size from user
  ///table must be larger than 6500 int
  tbSIZE = hashTableSize();
  ///driver function for all three
  ///collision resolution techniques
  threeHashMethods(randARRAY,tbSIZE);

  return 0;
}
int HASH(int key,int listSIZE){
  int address = 0;
  address = key % listSIZE;
  return address;
}
int doubleHASH(int key,int tbSIZE){
  int address = 0;
  address = (key % (tbSIZE - 2)) + 1;
  return address;
}
int hashTableSize(){
  int userCHOOSE = 0;

  cout << "Enter desired hash table size." << endl;
  cout << "NOTE: hash table size must exceed 6500: " << endl;
  cin >> userCHOOSE;
  if(userCHOOSE < 6500){
    cout << "Whoops " << userCHOOSE << " is to small!" << endl;
    hashTableSize();
  }
  return userCHOOSE;
}
int randNUMS(int *randARRAY){
  ///temporary fix for randARRAY array of numbers till hash is running
  int check = 0;
  int index = 0;
  int loop = 0;

  srand (time(NULL));
  for(index = 0; index < MAX_KEYS; index++){
    check = rand() % RANDOM + 1;
    while(randARRAY[loop] != 0){
      if(check == randARRAY[index]){
    check = rand() % RANDOM + 1;
    loop = 0;
      }
      loop++;
    }
    randARRAY[index] = check;
  }

  return *randARRAY;
}
void threeHashMethods(int *randARRAY,int tbSIZE){
  int *HT;


  ///this menu will allow user to select collision method
  HT = openAddressing(randARRAY,tbSIZE);
  listSEARCH(randARRAY,HT,tbSIZE);
}
int* openAddressing(int *randARRAY,int tbSIZE){
  int key = 0,
    address = 0,
    prb = 0,
    hashTABLE[tbSIZE * 2],
    *HT = hashTABLE;
  int percent = (5000.00 / tbSIZE) * 100;
  int load = (5000.00 / tbSIZE) * 10;
  int loadFACTOR = (tbSIZE * load)/10;

  if(percent > 0){

  for(int a = 0; a < tbSIZE; a++){

    hashTABLE[a] = 0;
  }

  while(randARRAY[key] != 0){
    ///get a purposed address
    ///and move through indexes
    ///in array of random int till
    ///empty index is found
    if(randARRAY[key] > tbSIZE){
    address = HASH(randARRAY[key],loadFACTOR);
    }
    ///if address is available 
    ///grab the key
    if(hashTABLE[address] == 0){
      hashTABLE[address] = randARRAY[key];
    }
    ///if a collision is the result run
    ///a linear probe until available address is found 
    else{
      address = linearPROBE(address,hashTABLE,0,tbSIZE,prb);
      hashTABLE[address] = randARRAY[key];
    }
    if(hashTABLE[address] == randARRAY[key]){
    key++;
    }
  }
  cout << key << " items loaded into a " << tbSIZE << " element hash table." << endl;
  cout << "Load Factor = " << percent << "%" << endl;
  cout << "Results from searching for 2500 items." << endl;
  }
  else{
    cout << "Load Factor is maxed out." << endl;
  }

  return HT;
}
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe){
  while(HASH[address] != probeTHIS){
    address = (address + 1);
    probe++;
    if(address >= load){
      address = 0;
    }
  }
  return address;
}
void listSEARCH(int *randARRAY,int *HT,int tbSIZE){
  int key = 0,
    address = 0,
    probe = 0,
    found = 0,
    attempts = 0;

  while(randARRAY[key] != 0){
    address =  HASH(randARRAY[key],tbSIZE);
    while(HT[address] != randARRAY[key] && attempts < tbSIZE){
      address = linearPROBE(address,HT,randARRAY[key],tbSIZE,probe);
      found++;
      attempts++;
    }
    key = key + 2;
    attempts = 0;
  }
  found = probe / found;

  cout << "Linear Probing." << endl;
  cout << probe  << " items examined ";
  cout << "(avg = " << found << " items examined per search.)" << endl;
}
1

There are 1 answers

1
Erik On

Off by one. This fills the first MAX_KEYS+1 elements of randARRAY.

  for(int a = 0; a <= MAX_KEYS; a++){
    randARRAY[a] = 0;
  }

This asks again then uses the old userCHOOSE value. You want return hashTableSize();

  if(userCHOOSE < 6500){
    cout << "Whoops " << userCHOOSE << " is to small!" << endl;
    hashTableSize();
  }

Then the real problem: In openAddressing you scan while randARRAY[key] != 0. Your randARRAY isn't 0-terminated (setting tbSIZE in main will overwrite your earlier off-by-one randARRAY[5000]), so you will scan beyond 5000 keys. Then in listSearch you'll access randARRAY[key] for key larger than 5000, which means you're reading "garbage" data, e.g. a negative number. You hash it (modulus) and it remains negative. You then access HT[negative value] which crashes.

EDIT: The fix:

  int randARRAY[MAX_KEYS+1];

This will prevent the off-by-one, make your 0-terminated scan work, and therefore max key at the correct value 5000.