You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
227 lines
8.0 KiB
JavaScript
227 lines
8.0 KiB
JavaScript
/*
|
|
Michael Murtaugh
|
|
Following Charles Hayden's 1990's interpretation implemented in Java
|
|
(C) 2018 and released under the Free Art License 1.3
|
|
See LICENSE.txt and http://artlibre.org/licence/lal/en/
|
|
|
|
Use the accompanying eliza_script_to_json.py to prepare the rules in JSON format
|
|
|
|
*/
|
|
function chatbot (opts) {
|
|
var rules = opts.rules,
|
|
textarea = opts.input,
|
|
display = opts.output,
|
|
debug = opts.debug || false,
|
|
autoscroll = opts.autoscroll === undefined ? true : opts.autoscroll;
|
|
var saved_statements = [];
|
|
function process_rules (rules) {
|
|
function looping_iterator (l) {
|
|
var next = function () {
|
|
var ret = l[next.i];
|
|
if (++next.i >= l.length) { next.i = 0 };
|
|
return ret;
|
|
};
|
|
next.length = l.length;
|
|
next.i = 0;
|
|
next.items = l;
|
|
return next;
|
|
}
|
|
// index pre + post and tokenize results
|
|
function _index (name) {
|
|
var new_value = {};
|
|
rules[name].forEach(function (x) {
|
|
var words = tokenize(x),
|
|
word = words.shift();
|
|
new_value[word] = words;
|
|
})
|
|
rules[name] = new_value;
|
|
}
|
|
_index("pre");
|
|
_index("post");
|
|
// index synonmys by first word
|
|
var new_synon = {};
|
|
rules.synon.forEach(function (x) {
|
|
var words = tokenize(x);
|
|
new_synon[words[0]] = words;
|
|
})
|
|
rules.synon = new_synon;
|
|
// index keywords by name
|
|
rules.keywords_by_token = {};
|
|
rules.keywords.forEach(function (x) {
|
|
// console.log("token", x.token);
|
|
rules.keywords_by_token[x.token] = x;
|
|
// wrap the rules
|
|
x.rules.forEach(function (r) {
|
|
// ensure list
|
|
if (!Array.isArray(r.reasmb)) { r.reasmb = [r.reasmb]; }
|
|
// wrap the reasmb list in a looping iterator that perserves its state
|
|
r.reasmb = looping_iterator(r.reasmb);
|
|
});
|
|
});
|
|
// console.log("keywords_by_token", rules.keywords_by_token)
|
|
}
|
|
|
|
// function trim (text) { return text.replace(/\s+$/, "").replace(/\s+/, ""); }
|
|
function trimword (text) {
|
|
return text
|
|
.replace(/[^a-zA-Zéèàöùç]+$/, "")
|
|
.replace(/^[^a-zA-Zéèàöùç]+/, "");
|
|
}
|
|
|
|
function tokenize (text) {
|
|
return (trimword(text).split(/\s+/).map(trimword));
|
|
}
|
|
|
|
// used for both pre + post subs
|
|
function sub (tokens, subst) {
|
|
for (var i=0, l=tokens.length; i<l; i++) {
|
|
var sub = subst[tokens[i].toLowerCase()];
|
|
if (sub) {
|
|
if (Array.isArray(sub)) {
|
|
Array.prototype.splice.apply(tokens, [i, 1].concat(sub));
|
|
i += (sub.length - 1);
|
|
} else {
|
|
tokens[i] = sub;
|
|
}
|
|
}
|
|
}
|
|
return tokens;
|
|
}
|
|
|
|
function select_keywords (tokens) {
|
|
var ret = [];
|
|
for (var i=0, l=tokens.length; i<l; i++) {
|
|
var w = tokens[i].toLowerCase(),
|
|
rule = rules.keywords_by_token[w];
|
|
if (rule) { ret.push(rule); }
|
|
}
|
|
if (rules.keywords_by_token.xnone && rules.keywords_by_token.xnone.weight != 0) {
|
|
// append xnone rule pre-sort
|
|
ret.push(rules.keywords_by_token.xnone);
|
|
}
|
|
ret.sort(function (a, b) { return b.weight - a.weight });
|
|
if (rules.keywords_by_token.xnone && rules.keywords_by_token.xnone.weight == 0) {
|
|
// append xnone rule post-sort (ensuring it's last)
|
|
ret.push(rules.keywords_by_token.xnone);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
function compile_pattern (pattern) {
|
|
// compile a decomposition pattern
|
|
// * i @cannot * ==> i(cannot|cant|...)(.+)
|
|
// * i* @belief *you * ==> (belief|feel|think|believe|wish)
|
|
// * i @belief i *
|
|
var ret = pattern
|
|
.replace(/ *\* */g, "*") // compact spaces around stars
|
|
.replace(/\*/g, "(.*?)")
|
|
.replace(/@(\w+)/, function (match, word) {
|
|
var syn = rules.synon[word.toLowerCase()];
|
|
if (syn) {
|
|
return "("+syn.join("|")+")";
|
|
} else {
|
|
console.log("Missing @synonym", word);
|
|
return match;
|
|
}
|
|
});
|
|
return "^"+ret+"$";
|
|
}
|
|
|
|
function match_decomp (pattern, tokens) {
|
|
var ppat = compile_pattern(pattern);
|
|
// console.log("compile_pattern.in", pattern);
|
|
// console.log("compile_pattern.out", ppat);
|
|
var ppat = new RegExp(ppat, "i");
|
|
return ppat.exec(tokens.join(" "));
|
|
}
|
|
|
|
function do_post (txt) {
|
|
var tokens = tokenize(txt);
|
|
tokens = sub(tokens, rules.post);
|
|
return tokens.join(" ");
|
|
}
|
|
|
|
function do_reasmb (reasmb, match, tokens) {
|
|
if (Array.isArray(match)) {
|
|
return reasmb.replace(/\((\d+)\)/, function (m, n) {
|
|
// console.log("replace", m, n);
|
|
var ret = match[parseInt(n)];
|
|
return do_post(ret); // apply POST substitutions here to matching input
|
|
});
|
|
} else {
|
|
return reasmb;
|
|
}
|
|
}
|
|
|
|
function apply_keywords (keywords, tokens) {
|
|
for (var i=0, l=keywords.length; i<l; i++) {
|
|
var keyword = keywords[i];
|
|
if ((keyword.token == "xnone") && (saved_statements.length > 0)) {
|
|
console.log("using saved statement");
|
|
return saved_statements.shift();
|
|
}
|
|
while (true) {
|
|
console.log("trying keyword", keyword.token);
|
|
for (var ri=0, rl = keyword.rules.length; ri<rl; ri++) {
|
|
var rule = keyword.rules[ri];
|
|
console.log("trying rule", rule.decomp, "("+(ri+1)+"/"+rl+")");
|
|
var match = match_decomp(rule.decomp, tokens);
|
|
if (match) {
|
|
var ra = rule.reasmb();
|
|
if (rule['save']) {
|
|
var save = do_reasmb(ra, match, tokens);
|
|
console.log("save", save);
|
|
saved_statements.push(save);
|
|
} else if (ra.indexOf("goto ") == 0) {
|
|
var goto_name = ra.substr(5);
|
|
console.log("goto", goto_name);
|
|
keyword = rules.keywords_by_token[goto_name]
|
|
} else {
|
|
console.log("match", match, ra);
|
|
return do_reasmb(ra, match, tokens);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
function log (msg, kls) {
|
|
var d = document.createElement("div");
|
|
d.setAttribute("class", "msg " + kls);
|
|
d.innerHTML = msg;
|
|
display.appendChild(d);
|
|
}
|
|
|
|
function say (msg) {
|
|
log(msg, "bot");
|
|
if (autoscroll) {
|
|
display.scrollTop = display.scrollTopMax;
|
|
}
|
|
}
|
|
|
|
function process (text) {
|
|
if (debug) { console.log("input", text); }
|
|
var tokens = tokenize(text);
|
|
if (debug) { console.log("tokens", tokens); }
|
|
tokens = sub(tokens, rules.pre);
|
|
if (debug) { console.log("pre", tokens); }
|
|
var keywords = select_keywords(tokens);
|
|
if (debug) { console.log("keywords", keywords.map(function (x) { return x.token })); }
|
|
var output = apply_keywords(keywords, tokens);
|
|
if (debug) { console.log("output", output); }
|
|
say(output);
|
|
}
|
|
|
|
process_rules(rules);
|
|
textarea.addEventListener("keypress", function (event) {
|
|
if (event.keyCode == 13) {
|
|
var text = textarea.value;
|
|
log(text, "user");
|
|
process(text);
|
|
textarea.value = "";
|
|
textarea.focus();
|
|
}
|
|
});
|
|
say(rules.initial);
|
|
} |