You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

202 lines
7.3 KiB
JavaScript

/*
Michael Murtaugh
Following Charles Hayden's 1990's interpretation implemented in Java
(C) 2018 and released under the Free Art License 1.3
See LICENSE.txt and http://artlibre.org/licence/lal/en/
Use the accompanying eliza_script_to_json.py to prepare the rules in JSON format
*/
function chatbot (rules, debug) {
var saved_statements = [];
function process_rules (rules) {
// transfrom / pre-process the rules
function looping_iterator (l) {
var next = function () {
var ret = l[next.i];
if (++next.i >= l.length) { next.i = 0 };
return ret;
};
next.length = l.length;
next.i = 0;
next.items = l;
return next;
}
// index pre + post and tokenize results
function _index (name) {
var new_value = {};
rules[name].forEach(function (x) {
var words = tokenize(x),
word = words.shift();
new_value[word] = words;
})
rules[name] = new_value;
}
_index("pre");
_index("post");
// index synonmys by first word
var new_synon = {};
rules.synon.forEach(function (x) {
var words = tokenize(x);
new_synon[words[0]] = words;
})
rules.synon = new_synon;
// index keywords by name
rules.keywords_by_token = {};
rules.keywords.forEach(function (x) {
rules.keywords_by_token[x.token] = x;
x.rules.forEach(function (r) {
// ensure list
if (!Array.isArray(r.reasmb)) { r.reasmb = [r.reasmb]; }
// wrap the reasmb list in a looping iterator that perserves its state
r.reasmb = looping_iterator(r.reasmb);
});
});
}
// function trim (text) { return text.replace(/\s+$/, "").replace(/\s+/, ""); }
function trimword (text) {
return text
.replace(/[^a-zA-Zéèàöùç]+$/, "")
.replace(/^[^a-zA-Zéèàöùç]+/, "");
}
function tokenize (text) {
return (trimword(text).split(/\s+/).map(trimword));
}
// used for both pre + post subs
function sub (tokens, subst) {
for (var i=0, l=tokens.length; i<l; i++) {
var sub = subst[tokens[i].toLowerCase()];
if (sub) {
if (Array.isArray(sub)) {
Array.prototype.splice.apply(tokens, [i, 1].concat(sub));
i += (sub.length - 1);
} else {
tokens[i] = sub;
}
}
}
return tokens;
}
function select_keywords (tokens) {
var ret = [];
for (var i=0, l=tokens.length; i<l; i++) {
var w = tokens[i].toLowerCase(),
rule = rules.keywords_by_token[w];
if (rule) { ret.push(rule); }
}
if (rules.keywords_by_token.xnone && rules.keywords_by_token.xnone.weight != 0) {
// append xnone rule pre-sort
ret.push(rules.keywords_by_token.xnone);
}
ret.sort(function (a, b) { return b.weight - a.weight });
if (rules.keywords_by_token.xnone && rules.keywords_by_token.xnone.weight == 0) {
// append xnone rule post-sort (ensuring it's last)
ret.push(rules.keywords_by_token.xnone);
}
return ret;
}
function compile_pattern (pattern) {
// compile a decomposition pattern
// * i @cannot * ==> i(cannot|cant|...)(.+)
// * i* @belief *you * ==> (belief|feel|think|believe|wish)
// * i @belief i *
var ret = pattern
.replace(/ *\* */g, "*") // compact spaces around stars
.replace(/\*/g, "(.*?)")
.replace(/@(\w+)/, function (match, word) {
var syn = rules.synon[word.toLowerCase()];
if (syn) {
return "("+syn.join("|")+")";
} else {
console.log("Missing @synonym", word);
return match;
}
});
return "^"+ret+"$";
}
function match_decomp (pattern, tokens) {
var ppat = compile_pattern(pattern);
if (debug) {
console.log("compile_pattern.in", pattern);
console.log("compile_pattern.out", ppat);
}
var ppat = new RegExp(ppat, "i");
return ppat.exec(tokens.join(" "));
}
function do_post (txt) {
var tokens = tokenize(txt);
tokens = sub(tokens, rules.post);
return tokens.join(" ");
}
function do_reasmb (reasmb, match, tokens) {
if (Array.isArray(match)) {
return reasmb.replace(/\((\d+)\)/, function (m, n) {
return do_post(match[parseInt(n)]); // apply POST substitutions here to matching input
});
} else {
return reasmb;
}
}
function apply_keywords (keywords, tokens) {
for (var i=0, l=keywords.length; i<l; i++) {
var keyword = keywords[i];
if ((keyword.token == "xnone") && (saved_statements.length > 0)) {
if (debug) { console.log("using saved statement"); }
return saved_statements.shift();
}
var loop = true;
while (loop) {
loop = false;
if (debug) { console.log("trying keyword", keyword.token); }
for (var ri=0, rl = keyword.rules.length; ri<rl; ri++) {
var rule = keyword.rules[ri];
if (debug) { console.log("trying rule", rule.decomp, "("+(ri+1)+"/"+rl+")"); }
var match = match_decomp(rule.decomp, tokens);
if (match) {
var ra = rule.reasmb();
if (rule['save']) {
var save = do_reasmb(ra, match, tokens);
if (debug) { console.log("save", save); }
saved_statements.push(save);
} else if (ra.indexOf("goto ") == 0) {
var goto_name = ra.substr(5);
if (debug) { console.log("goto", goto_name); }
keyword = rules.keywords_by_token[goto_name];
loop = true;
break;
} else {
if (debug) { console.log("match", match, ra); }
return do_reasmb(ra, match, tokens);
}
}
}
}
}
}
function process (text) {
if (debug) { console.log("input", text); }
var tokens = tokenize(text);
if (debug) { console.log("tokens", tokens); }
tokens = sub(tokens, rules.pre);
if (debug) { console.log("pre", tokens); }
var keywords = select_keywords(tokens);
if (debug) { console.log("keywords", keywords.map(function (x) { return x.token })); }
var output = apply_keywords(keywords, tokens);
if (debug) { console.log("output", output); }
return output;
}
process_rules(rules);
return process;
}