You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
202 lines
7.3 KiB
JavaScript
202 lines
7.3 KiB
JavaScript
/*
|
|
Michael Murtaugh
|
|
Following Charles Hayden's 1990's interpretation implemented in Java
|
|
(C) 2018 and released under the Free Art License 1.3
|
|
See LICENSE.txt and http://artlibre.org/licence/lal/en/
|
|
|
|
Use the accompanying eliza_script_to_json.py to prepare the rules in JSON format
|
|
|
|
*/
|
|
function chatbot (rules, debug) {
|
|
var saved_statements = [];
|
|
|
|
function process_rules (rules) {
|
|
// transfrom / pre-process the rules
|
|
function looping_iterator (l) {
|
|
var next = function () {
|
|
var ret = l[next.i];
|
|
if (++next.i >= l.length) { next.i = 0 };
|
|
return ret;
|
|
};
|
|
next.length = l.length;
|
|
next.i = 0;
|
|
next.items = l;
|
|
return next;
|
|
}
|
|
// index pre + post and tokenize results
|
|
function _index (name) {
|
|
var new_value = {};
|
|
rules[name].forEach(function (x) {
|
|
var words = tokenize(x),
|
|
word = words.shift();
|
|
new_value[word] = words;
|
|
})
|
|
rules[name] = new_value;
|
|
}
|
|
_index("pre");
|
|
_index("post");
|
|
// index synonmys by first word
|
|
var new_synon = {};
|
|
rules.synon.forEach(function (x) {
|
|
var words = tokenize(x);
|
|
new_synon[words[0]] = words;
|
|
})
|
|
rules.synon = new_synon;
|
|
// index keywords by name
|
|
rules.keywords_by_token = {};
|
|
rules.keywords.forEach(function (x) {
|
|
rules.keywords_by_token[x.token] = x;
|
|
x.rules.forEach(function (r) {
|
|
// ensure list
|
|
if (!Array.isArray(r.reasmb)) { r.reasmb = [r.reasmb]; }
|
|
// wrap the reasmb list in a looping iterator that perserves its state
|
|
r.reasmb = looping_iterator(r.reasmb);
|
|
});
|
|
});
|
|
}
|
|
|
|
// function trim (text) { return text.replace(/\s+$/, "").replace(/\s+/, ""); }
|
|
function trimword (text) {
|
|
return text
|
|
.replace(/[^a-zA-Zéèàöùç]+$/, "")
|
|
.replace(/^[^a-zA-Zéèàöùç]+/, "");
|
|
}
|
|
|
|
function tokenize (text) {
|
|
return (trimword(text).split(/\s+/).map(trimword));
|
|
}
|
|
|
|
// used for both pre + post subs
|
|
function sub (tokens, subst) {
|
|
for (var i=0, l=tokens.length; i<l; i++) {
|
|
var sub = subst[tokens[i].toLowerCase()];
|
|
if (sub) {
|
|
if (Array.isArray(sub)) {
|
|
Array.prototype.splice.apply(tokens, [i, 1].concat(sub));
|
|
i += (sub.length - 1);
|
|
} else {
|
|
tokens[i] = sub;
|
|
}
|
|
}
|
|
}
|
|
return tokens;
|
|
}
|
|
|
|
function select_keywords (tokens) {
|
|
var ret = [];
|
|
for (var i=0, l=tokens.length; i<l; i++) {
|
|
var w = tokens[i].toLowerCase(),
|
|
rule = rules.keywords_by_token[w];
|
|
if (rule) { ret.push(rule); }
|
|
}
|
|
if (rules.keywords_by_token.xnone && rules.keywords_by_token.xnone.weight != 0) {
|
|
// append xnone rule pre-sort
|
|
ret.push(rules.keywords_by_token.xnone);
|
|
}
|
|
ret.sort(function (a, b) { return b.weight - a.weight });
|
|
if (rules.keywords_by_token.xnone && rules.keywords_by_token.xnone.weight == 0) {
|
|
// append xnone rule post-sort (ensuring it's last)
|
|
ret.push(rules.keywords_by_token.xnone);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
function compile_pattern (pattern) {
|
|
// compile a decomposition pattern
|
|
// * i @cannot * ==> i(cannot|cant|...)(.+)
|
|
// * i* @belief *you * ==> (belief|feel|think|believe|wish)
|
|
// * i @belief i *
|
|
var ret = pattern
|
|
.replace(/ *\* */g, "*") // compact spaces around stars
|
|
.replace(/\*/g, "(.*?)")
|
|
.replace(/@(\w+)/, function (match, word) {
|
|
var syn = rules.synon[word.toLowerCase()];
|
|
if (syn) {
|
|
return "("+syn.join("|")+")";
|
|
} else {
|
|
console.log("Missing @synonym", word);
|
|
return match;
|
|
}
|
|
});
|
|
return "^"+ret+"$";
|
|
}
|
|
|
|
function match_decomp (pattern, tokens) {
|
|
var ppat = compile_pattern(pattern);
|
|
if (debug) {
|
|
console.log("compile_pattern.in", pattern);
|
|
console.log("compile_pattern.out", ppat);
|
|
}
|
|
var ppat = new RegExp(ppat, "i");
|
|
return ppat.exec(tokens.join(" "));
|
|
}
|
|
|
|
function do_post (txt) {
|
|
var tokens = tokenize(txt);
|
|
tokens = sub(tokens, rules.post);
|
|
return tokens.join(" ");
|
|
}
|
|
|
|
function do_reasmb (reasmb, match, tokens) {
|
|
if (Array.isArray(match)) {
|
|
return reasmb.replace(/\((\d+)\)/, function (m, n) {
|
|
return do_post(match[parseInt(n)]); // apply POST substitutions here to matching input
|
|
});
|
|
} else {
|
|
return reasmb;
|
|
}
|
|
}
|
|
|
|
function apply_keywords (keywords, tokens) {
|
|
for (var i=0, l=keywords.length; i<l; i++) {
|
|
var keyword = keywords[i];
|
|
if ((keyword.token == "xnone") && (saved_statements.length > 0)) {
|
|
if (debug) { console.log("using saved statement"); }
|
|
return saved_statements.shift();
|
|
}
|
|
var loop = true;
|
|
while (loop) {
|
|
loop = false;
|
|
if (debug) { console.log("trying keyword", keyword.token); }
|
|
for (var ri=0, rl = keyword.rules.length; ri<rl; ri++) {
|
|
var rule = keyword.rules[ri];
|
|
if (debug) { console.log("trying rule", rule.decomp, "("+(ri+1)+"/"+rl+")"); }
|
|
var match = match_decomp(rule.decomp, tokens);
|
|
if (match) {
|
|
var ra = rule.reasmb();
|
|
if (rule['save']) {
|
|
var save = do_reasmb(ra, match, tokens);
|
|
if (debug) { console.log("save", save); }
|
|
saved_statements.push(save);
|
|
} else if (ra.indexOf("goto ") == 0) {
|
|
var goto_name = ra.substr(5);
|
|
if (debug) { console.log("goto", goto_name); }
|
|
keyword = rules.keywords_by_token[goto_name];
|
|
loop = true;
|
|
break;
|
|
} else {
|
|
if (debug) { console.log("match", match, ra); }
|
|
return do_reasmb(ra, match, tokens);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
function process (text) {
|
|
if (debug) { console.log("input", text); }
|
|
var tokens = tokenize(text);
|
|
if (debug) { console.log("tokens", tokens); }
|
|
tokens = sub(tokens, rules.pre);
|
|
if (debug) { console.log("pre", tokens); }
|
|
var keywords = select_keywords(tokens);
|
|
if (debug) { console.log("keywords", keywords.map(function (x) { return x.token })); }
|
|
var output = apply_keywords(keywords, tokens);
|
|
if (debug) { console.log("output", output); }
|
|
return output;
|
|
}
|
|
|
|
process_rules(rules);
|
|
return process;
|
|
} |