I need to extend the Aho-Corasick algorithm to match partial dictionary entries. For that, I modified an implementation in JS and I made some tests but it does not work well yet
Here is my code:
var AhoCorasick = function (keywords) {
this._buildTables(keywords);
};
AhoCorasick.prototype._buildTables = function (keywords) {
var gotoFn = {
0: {},
};
var output = {};
var state = 0;
keywords.forEach(function (word) {
var curr = 0;
for (var i = 0; i < word.length; i++) {
var l = word[i];
if (gotoFn[curr] && l in gotoFn[curr]) {
curr = gotoFn[curr][l];
} else {
state++;
gotoFn[curr][l] = state;
gotoFn[state] = {};
curr = state;
output[state] = [];
}
}
output[curr].push(word);
});
var failure = {};
var xs = [];
// f(s) = 0 for all states of depth 1 (the ones from which the 0 state can transition to)
for (var l in gotoFn[0]) {
var state = gotoFn[0][l];
failure[state] = 0;
xs.push(state);
}
while (xs.length) {
var r = xs.shift();
// for each symbol a such that g(r, a) = s
for (var l in gotoFn[r]) {
var s = gotoFn[r][l];
xs.push(s);
// set state = f(r)
var state = failure[r];
while (state > 0 && !(l in gotoFn[state])) {
state = failure[state];
}
if (l in gotoFn[state]) {
var fs = gotoFn[state][l];
failure[s] = fs;
output[s] = output[s].concat(output[fs]);
} else {
failure[s] = 0;
}
}
}
this.gotoFn = gotoFn;
this.output = output;
this.failure = failure;
};
AhoCorasick.prototype.search = function (string) {
//console.log(this.gotoFn);
var noErrorString = '';
var state = 0;
var results = [];
var h = 0;
var init = false;
aho_loop: for (var i = 0; i < string.length; i++) {
var l = string[i];
while (state > 0 && !(l in this.gotoFn[state])) {
state = this.failure[state];
noErrorString = '';
}
if (!(l in this.gotoFn[state])) {
if (h != 0 || !init) {
h++;
init = true;
var results2 = [];
for (key in this.gotoFn) {
for (subkey in this.gotoFn[key]) {
if (subkey == l) {
results2.push(this.gotoFn[key][l]);
}
}
}
}
console.log(l);
console.log(results2);
while (h < results2.length) {
state = results2[h];
noErrorString += l;
h++;
continue aho_loop;
}
h = 0;
init = false;
continue;
}
state = this.gotoFn[state][l];
noErrorString += l;
h = 0;
init = false;
if ((noErrorString.match(/ /g) || []).length > 1) {
var foundStrs = noErrorString;
if (string !== undefined) {
if (i + 1 < string.length) {
if (!string[i + 1].match(/\w+/g)) {
if (typeof string[i - foundStrs.length] === 'undefined') {
results.push([i, foundStrs]); // + 1
} else if (!string[i - foundStrs.length].match(/\w+/g)) {
results.push([i, foundStrs]); // + 1
}
}
} else {
results.push([i, foundStrs]);
}
}
}
}
return results;
};
var ac = new AhoCorasick([
'brownie t test keyword1 hello stop',
'lele brown',
'etc',
]);
var results = ac.search('test keyword1');
When i enter this entry in the dictionary:
brownie t test keyword1 hello stop
The result is:
"test keyword1 "
So is good, but if i enter this:
brownie test test keyword1 hello stop
The result is:
"st keyword1 "
And when i enter this:
brownie t t test keyword1 hello stop
The result is empty...
Can someone help me find my error?
Thanks!