I have started learning string processing algorithms and wanted to implement the Rabin-Karp algorithm in C++. I have taken:
p = prime number larger than character set: 31 m = prime number for mod operations: 1e9+9
According to what I can find, there are multiple ways to implement the algorithm and I have tested two of them: increasing power as we go to higher index and decreasing power as we go to higher index.
For example on string S[0..n-1]
Let M1 = S[0]*p^0 + S[1]*p^1 + S[2]*p^2 + ... + S[n-1]*p^n-1
Let M2 = S[0]*p^n-1 + S[1]*p^n-2 + S[2]*p^n-3 ... + S[n-1]*p^0
I have implemented both the methods and could only get successful results using M2.
Code for M1:
int rabinKarpM1(string s, string t) {
int a = (int)s.size(), b = (int)t.size();
long long p = 31, m = 1e9+9;
long long powTable[a] = {0};
powTable[0] = 1;
for (int i = 1; i < a; i++) {
powTable[i] = powTable[i-1] * p % m;
}
long long hashS = 0, hashT = 0;
for (int i = 0; i < a; i++) {
hashS = (hashS + (s[i] - 'a' + 1)*powTable[i] % m) % m;
hashT = (hashT + (t[i] - 'a' + 1)*powTable[i] % m) % m;
}
if (hashS == hashT) {
bool match = true;
for (int i = 0; i < a; i++) {
if (s[i] != t[i]) {
match = false;
break;
}
}
if (match) {
return 0;
}
}
for (int i = 0; i+a-1 < b; i++) {
hashT = (hashT - (t[i] - 'a' + 1)) / p;
hashT = hashT + (t[i+a] - 'a' + 1)*powTable[a-1] % m;
hashT = hashT % m;
if (hashS == hashT) {
bool match = true;
for (int j = i+1; j < a+i+1; j++) {
if (s[j-i-1] != t[j]) {
match = false;
break;
}
}
if (match) {
return i+1;
}
}
}
return -1;
}
Code for M2:
int rabinKarpM2(string s, string t) {
int a = (int)s.size(), b = (int)t.size();
long long p = 31, m = 1e9+9;
long long powTable[a] = {0};
powTable[0] = 1;
for (int i = 1; i < a; i++) {
powTable[i] = powTable[i-1] * p % m;
}
long long hashS = 0, hashT = 0;
for (int i = 0; i < a; i++) {
hashS = (hashS + (s[i] - 'a' + 1)*powTable[a-i-1] % m) % m;
hashT = (hashT + (t[i] - 'a' + 1)*powTable[a-i-1] % m) % m;
}
if (hashS == hashT) {
bool match = true;
for (int i = 0; i < a; i++) {
if (s[i] != t[i]) {
match = false;
break;
}
}
if (match) {
return 0;
}
}
for (int i = 0; i+a-1 < b; i++) {
hashT = (hashT + m - (t[i] - 'a' + 1)*powTable[a-1] % m) % m * p;
hashT = hashT + (t[i+a] - 'a' + 1) % m;
hashT = hashT % m;
if (hashS == hashT) {
bool match = true;
for (int j = i+1; j < a+i+1; j++) {
if (s[j-i-1] != t[j]) {
match = false;
break;
}
}
if (match) {
return i+1;
}
}
}
return -1;
}
Test Input: string s = foobarfoo string t = barfoobarfoobarfoobarfoobarfoobarfoo
I have got correct results on M2 but not on M1.