# Natural Language Toolkit: Models for first-order languages with lambda # # Copyright (C) 2001-2020 NLTK Project # Author: Ewan Klein , # URL: # For license information, see LICENSE.TXT # TODO: # - fix tracing # - fix iterator-based approach to existentials """ This module provides data structures for representing first-order models. """ from pprint import pformat import inspect import textwrap import re import sys from nltk.decorators import decorator # this used in code that is commented out from nltk.sem.logic import ( AbstractVariableExpression, AllExpression, Expression, AndExpression, ApplicationExpression, EqualityExpression, ExistsExpression, IffExpression, ImpExpression, IndividualVariableExpression, LambdaExpression, NegatedExpression, OrExpression, Variable, is_indvar, ) class Error(Exception): pass class Undefined(Error): pass def trace(f, *args, **kw): argspec = inspect.getfullargspec(f) d = dict(zip(argspec[0], args)) if d.pop("trace", None): print() for item in d.items(): print("%s => %s" % item) return f(*args, **kw) def is_rel(s): """ Check whether a set represents a relation (of any arity). :param s: a set containing tuples of str elements :type s: set :rtype: bool """ # we have the empty relation, i.e. set() if len(s) == 0: return True # all the elements are tuples of the same length elif all(isinstance(el, tuple) for el in s) and len(max(s)) == len(min(s)): return True else: raise ValueError("Set %r contains sequences of different lengths" % s) def set2rel(s): """ Convert a set containing individuals (strings or numbers) into a set of unary tuples. Any tuples of strings already in the set are passed through unchanged. For example: - set(['a', 'b']) => set([('a',), ('b',)]) - set([3, 27]) => set([('3',), ('27',)]) :type s: set :rtype: set of tuple of str """ new = set() for elem in s: if isinstance(elem, str): new.add((elem,)) elif isinstance(elem, int): new.add((str(elem))) else: new.add(elem) return new def arity(rel): """ Check the arity of a relation. :type rel: set of tuples :rtype: int of tuple of str """ if len(rel) == 0: return 0 return len(list(rel)[0]) class Valuation(dict): """ A dictionary which represents a model-theoretic Valuation of non-logical constants. Keys are strings representing the constants to be interpreted, and values correspond to individuals (represented as strings) and n-ary relations (represented as sets of tuples of strings). An instance of ``Valuation`` will raise a KeyError exception (i.e., just behave like a standard dictionary) if indexed with an expression that is not in its list of symbols. """ def __init__(self, xs): """ :param xs: a list of (symbol, value) pairs. """ super(Valuation, self).__init__() for (sym, val) in xs: if isinstance(val, str) or isinstance(val, bool): self[sym] = val elif isinstance(val, set): self[sym] = set2rel(val) else: msg = textwrap.fill( "Error in initializing Valuation. " "Unrecognized value for symbol '%s':\n%s" % (sym, val), width=66, ) raise ValueError(msg) def __getitem__(self, key): if key in self: return dict.__getitem__(self, key) else: raise Undefined("Unknown expression: '%s'" % key) def __str__(self): return pformat(self) @property def domain(self): """Set-theoretic domain of the value-space of a Valuation.""" dom = [] for val in self.values(): if isinstance(val, str): dom.append(val) elif not isinstance(val, bool): dom.extend( [elem for tuple_ in val for elem in tuple_ if elem is not None] ) return set(dom) @property def symbols(self): """The non-logical constants which the Valuation recognizes.""" return sorted(self.keys()) @classmethod def fromstring(cls, s): return read_valuation(s) ########################################## # REs used by the _read_valuation function ########################################## _VAL_SPLIT_RE = re.compile(r"\s*=+>\s*") _ELEMENT_SPLIT_RE = re.compile(r"\s*,\s*") _TUPLES_RE = re.compile( r"""\s* (\([^)]+\)) # tuple-expression \s*""", re.VERBOSE, ) def _read_valuation_line(s): """ Read a line in a valuation file. Lines are expected to be of the form:: noosa => n girl => {g1, g2} chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)} :param s: input line :type s: str :return: a pair (symbol, value) :rtype: tuple """ pieces = _VAL_SPLIT_RE.split(s) symbol = pieces[0] value = pieces[1] # check whether the value is meant to be a set if value.startswith("{"): value = value[1:-1] tuple_strings = _TUPLES_RE.findall(value) # are the set elements tuples? if tuple_strings: set_elements = [] for ts in tuple_strings: ts = ts[1:-1] element = tuple(_ELEMENT_SPLIT_RE.split(ts)) set_elements.append(element) else: set_elements = _ELEMENT_SPLIT_RE.split(value) value = set(set_elements) return symbol, value def read_valuation(s, encoding=None): """ Convert a valuation string into a valuation. :param s: a valuation string :type s: str :param encoding: the encoding of the input string, if it is binary :type encoding: str :return: a ``nltk.sem`` valuation :rtype: Valuation """ if encoding is not None: s = s.decode(encoding) statements = [] for linenum, line in enumerate(s.splitlines()): line = line.strip() if line.startswith("#") or line == "": continue try: statements.append(_read_valuation_line(line)) except ValueError: raise ValueError("Unable to parse line %s: %s" % (linenum, line)) return Valuation(statements) class Assignment(dict): """ A dictionary which represents an assignment of values to variables. An assigment can only assign values from its domain. If an unknown expression *a* is passed to a model *M*\ 's interpretation function *i*, *i* will first check whether *M*\ 's valuation assigns an interpretation to *a* as a constant, and if this fails, *i* will delegate the interpretation of *a* to *g*. *g* only assigns values to individual variables (i.e., members of the class ``IndividualVariableExpression`` in the ``logic`` module. If a variable is not assigned a value by *g*, it will raise an ``Undefined`` exception. A variable *Assignment* is a mapping from individual variables to entities in the domain. Individual variables are usually indicated with the letters ``'x'``, ``'y'``, ``'w'`` and ``'z'``, optionally followed by an integer (e.g., ``'x0'``, ``'y332'``). Assignments are created using the ``Assignment`` constructor, which also takes the domain as a parameter. >>> from nltk.sem.evaluate import Assignment >>> dom = set(['u1', 'u2', 'u3', 'u4']) >>> g3 = Assignment(dom, [('x', 'u1'), ('y', 'u2')]) >>> g3 == {'x': 'u1', 'y': 'u2'} True There is also a ``print`` format for assignments which uses a notation closer to that in logic textbooks: >>> print(g3) g[u1/x][u2/y] It is also possible to update an assignment using the ``add`` method: >>> dom = set(['u1', 'u2', 'u3', 'u4']) >>> g4 = Assignment(dom) >>> g4.add('x', 'u1') {'x': 'u1'} With no arguments, ``purge()`` is equivalent to ``clear()`` on a dictionary: >>> g4.purge() >>> g4 {} :param domain: the domain of discourse :type domain: set :param assign: a list of (varname, value) associations :type assign: list """ def __init__(self, domain, assign=None): super(Assignment, self).__init__() self.domain = domain if assign: for (var, val) in assign: assert val in self.domain, "'%s' is not in the domain: %s" % ( val, self.domain, ) assert is_indvar(var), ( "Wrong format for an Individual Variable: '%s'" % var ) self[var] = val self.variant = None self._addvariant() def __getitem__(self, key): if key in self: return dict.__getitem__(self, key) else: raise Undefined("Not recognized as a variable: '%s'" % key) def copy(self): new = Assignment(self.domain) new.update(self) return new def purge(self, var=None): """ Remove one or all keys (i.e. logic variables) from an assignment, and update ``self.variant``. :param var: a Variable acting as a key for the assignment. """ if var: del self[var] else: self.clear() self._addvariant() return None def __str__(self): """ Pretty printing for assignments. {'x', 'u'} appears as 'g[u/x]' """ gstring = "g" # Deterministic output for unit testing. variant = sorted(self.variant) for (val, var) in variant: gstring += "[%s/%s]" % (val, var) return gstring def _addvariant(self): """ Create a more pretty-printable version of the assignment. """ list_ = [] for item in self.items(): pair = (item[1], item[0]) list_.append(pair) self.variant = list_ return None def add(self, var, val): """ Add a new variable-value pair to the assignment, and update ``self.variant``. """ assert val in self.domain, "%s is not in the domain %s" % (val, self.domain) assert is_indvar(var), "Wrong format for an Individual Variable: '%s'" % var self[var] = val self._addvariant() return self class Model(object): """ A first order model is a domain *D* of discourse and a valuation *V*. A domain *D* is a set, and a valuation *V* is a map that associates expressions with values in the model. The domain of *V* should be a subset of *D*. Construct a new ``Model``. :type domain: set :param domain: A set of entities representing the domain of discourse of the model. :type valuation: Valuation :param valuation: the valuation of the model. :param prop: If this is set, then we are building a propositional\ model and don't require the domain of *V* to be subset of *D*. """ def __init__(self, domain, valuation): assert isinstance(domain, set) self.domain = domain self.valuation = valuation if not domain.issuperset(valuation.domain): raise Error( "The valuation domain, %s, must be a subset of the model's domain, %s" % (valuation.domain, domain) ) def __repr__(self): return "(%r, %r)" % (self.domain, self.valuation) def __str__(self): return "Domain = %s,\nValuation = \n%s" % (self.domain, self.valuation) def evaluate(self, expr, g, trace=None): """ Read input expressions, and provide a handler for ``satisfy`` that blocks further propagation of the ``Undefined`` error. :param expr: An ``Expression`` of ``logic``. :type g: Assignment :param g: an assignment to individual variables. :rtype: bool or 'Undefined' """ try: parsed = Expression.fromstring(expr) value = self.satisfy(parsed, g, trace=trace) if trace: print() print("'%s' evaluates to %s under M, %s" % (expr, value, g)) return value except Undefined: if trace: print() print("'%s' is undefined under M, %s" % (expr, g)) return "Undefined" def satisfy(self, parsed, g, trace=None): """ Recursive interpretation function for a formula of first-order logic. Raises an ``Undefined`` error when ``parsed`` is an atomic string but is not a symbol or an individual variable. :return: Returns a truth value or ``Undefined`` if ``parsed`` is\ complex, and calls the interpretation function ``i`` if ``parsed``\ is atomic. :param parsed: An expression of ``logic``. :type g: Assignment :param g: an assignment to individual variables. """ if isinstance(parsed, ApplicationExpression): function, arguments = parsed.uncurry() if isinstance(function, AbstractVariableExpression): # It's a predicate expression ("P(x,y)"), so used uncurried arguments funval = self.satisfy(function, g) argvals = tuple(self.satisfy(arg, g) for arg in arguments) return argvals in funval else: # It must be a lambda expression, so use curried form funval = self.satisfy(parsed.function, g) argval = self.satisfy(parsed.argument, g) return funval[argval] elif isinstance(parsed, NegatedExpression): return not self.satisfy(parsed.term, g) elif isinstance(parsed, AndExpression): return self.satisfy(parsed.first, g) and self.satisfy(parsed.second, g) elif isinstance(parsed, OrExpression): return self.satisfy(parsed.first, g) or self.satisfy(parsed.second, g) elif isinstance(parsed, ImpExpression): return (not self.satisfy(parsed.first, g)) or self.satisfy(parsed.second, g) elif isinstance(parsed, IffExpression): return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g) elif isinstance(parsed, EqualityExpression): return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g) elif isinstance(parsed, AllExpression): new_g = g.copy() for u in self.domain: new_g.add(parsed.variable.name, u) if not self.satisfy(parsed.term, new_g): return False return True elif isinstance(parsed, ExistsExpression): new_g = g.copy() for u in self.domain: new_g.add(parsed.variable.name, u) if self.satisfy(parsed.term, new_g): return True return False elif isinstance(parsed, LambdaExpression): cf = {} var = parsed.variable.name for u in self.domain: val = self.satisfy(parsed.term, g.add(var, u)) # NB the dict would be a lot smaller if we do this: # if val: cf[u] = val # But then need to deal with cases where f(a) should yield # a function rather than just False. cf[u] = val return cf else: return self.i(parsed, g, trace) # @decorator(trace_eval) def i(self, parsed, g, trace=False): """ An interpretation function. Assuming that ``parsed`` is atomic: - if ``parsed`` is a non-logical constant, calls the valuation *V* - else if ``parsed`` is an individual variable, calls assignment *g* - else returns ``Undefined``. :param parsed: an ``Expression`` of ``logic``. :type g: Assignment :param g: an assignment to individual variables. :return: a semantic value """ # If parsed is a propositional letter 'p', 'q', etc, it could be in valuation.symbols # and also be an IndividualVariableExpression. We want to catch this first case. # So there is a procedural consequence to the ordering of clauses here: if parsed.variable.name in self.valuation.symbols: return self.valuation[parsed.variable.name] elif isinstance(parsed, IndividualVariableExpression): return g[parsed.variable.name] else: raise Undefined("Can't find a value for %s" % parsed) def satisfiers(self, parsed, varex, g, trace=None, nesting=0): """ Generate the entities from the model's domain that satisfy an open formula. :param parsed: an open formula :type parsed: Expression :param varex: the relevant free individual variable in ``parsed``. :type varex: VariableExpression or str :param g: a variable assignment :type g: Assignment :return: a set of the entities that satisfy ``parsed``. """ spacer = " " indent = spacer + (spacer * nesting) candidates = [] if isinstance(varex, str): var = Variable(varex) else: var = varex if var in parsed.free(): if trace: print() print( (spacer * nesting) + "Open formula is '%s' with assignment %s" % (parsed, g) ) for u in self.domain: new_g = g.copy() new_g.add(var.name, u) if trace and trace > 1: lowtrace = trace - 1 else: lowtrace = 0 value = self.satisfy(parsed, new_g, lowtrace) if trace: print(indent + "(trying assignment %s)" % new_g) # parsed == False under g[u/var]? if value == False: if trace: print( indent + "value of '%s' under %s is False" % (parsed, new_g) ) # so g[u/var] is a satisfying assignment else: candidates.append(u) if trace: print( indent + "value of '%s' under %s is %s" % (parsed, new_g, value) ) result = set(c for c in candidates) # var isn't free in parsed else: raise Undefined("%s is not free in %s" % (var.name, parsed)) return result # ////////////////////////////////////////////////////////////////////// # Demo.. # ////////////////////////////////////////////////////////////////////// # number of spacer chars mult = 30 # Demo 1: Propositional Logic ################# def propdemo(trace=None): """Example of a propositional model.""" global val1, dom1, m1, g1 val1 = Valuation([("P", True), ("Q", True), ("R", False)]) dom1 = set([]) m1 = Model(dom1, val1) g1 = Assignment(dom1) print() print("*" * mult) print("Propositional Formulas Demo") print("*" * mult) print("(Propositional constants treated as nullary predicates)") print() print("Model m1:\n", m1) print("*" * mult) sentences = [ "(P & Q)", "(P & R)", "- P", "- R", "- - P", "- (P & R)", "(P | R)", "(R | P)", "(R | R)", "(- P | R)", "(P | - P)", "(P -> Q)", "(P -> R)", "(R -> P)", "(P <-> P)", "(R <-> R)", "(P <-> R)", ] for sent in sentences: if trace: print() m1.evaluate(sent, g1, trace) else: print("The value of '%s' is: %s" % (sent, m1.evaluate(sent, g1))) # Demo 2: FOL Model ############# def folmodel(quiet=False, trace=None): """Example of a first-order model.""" global val2, v2, dom2, m2, g2 v2 = [ ("adam", "b1"), ("betty", "g1"), ("fido", "d1"), ("girl", set(["g1", "g2"])), ("boy", set(["b1", "b2"])), ("dog", set(["d1"])), ("love", set([("b1", "g1"), ("b2", "g2"), ("g1", "b1"), ("g2", "b1")])), ] val2 = Valuation(v2) dom2 = val2.domain m2 = Model(dom2, val2) g2 = Assignment(dom2, [("x", "b1"), ("y", "g2")]) if not quiet: print() print("*" * mult) print("Models Demo") print("*" * mult) print("Model m2:\n", "-" * 14, "\n", m2) print("Variable assignment = ", g2) exprs = ["adam", "boy", "love", "walks", "x", "y", "z"] parsed_exprs = [Expression.fromstring(e) for e in exprs] print() for parsed in parsed_exprs: try: print( "The interpretation of '%s' in m2 is %s" % (parsed, m2.i(parsed, g2)) ) except Undefined: print("The interpretation of '%s' in m2 is Undefined" % parsed) applications = [ ("boy", ("adam")), ("walks", ("adam",)), ("love", ("adam", "y")), ("love", ("y", "adam")), ] for (fun, args) in applications: try: funval = m2.i(Expression.fromstring(fun), g2) argsval = tuple(m2.i(Expression.fromstring(arg), g2) for arg in args) print("%s(%s) evaluates to %s" % (fun, args, argsval in funval)) except Undefined: print("%s(%s) evaluates to Undefined" % (fun, args)) # Demo 3: FOL ######### def foldemo(trace=None): """ Interpretation of closed expressions in a first-order model. """ folmodel(quiet=True) print() print("*" * mult) print("FOL Formulas Demo") print("*" * mult) formulas = [ "love (adam, betty)", "(adam = mia)", "\\x. (boy(x) | girl(x))", "\\x. boy(x)(adam)", "\\x y. love(x, y)", "\\x y. love(x, y)(adam)(betty)", "\\x y. love(x, y)(adam, betty)", "\\x y. (boy(x) & love(x, y))", "\\x. exists y. (boy(x) & love(x, y))", "exists z1. boy(z1)", "exists x. (boy(x) & -(x = adam))", "exists x. (boy(x) & all y. love(y, x))", "all x. (boy(x) | girl(x))", "all x. (girl(x) -> exists y. boy(y) & love(x, y))", # Every girl loves exists boy. "exists x. (boy(x) & all y. (girl(y) -> love(y, x)))", # There is exists boy that every girl loves. "exists x. (boy(x) & all y. (girl(y) -> love(x, y)))", # exists boy loves every girl. "all x. (dog(x) -> - girl(x))", "exists x. exists y. (love(x, y) & love(x, y))", ] for fmla in formulas: g2.purge() if trace: m2.evaluate(fmla, g2, trace) else: print("The value of '%s' is: %s" % (fmla, m2.evaluate(fmla, g2))) # Demo 3: Satisfaction ############# def satdemo(trace=None): """Satisfiers of an open formula in a first order model.""" print() print("*" * mult) print("Satisfiers Demo") print("*" * mult) folmodel(quiet=True) formulas = [ "boy(x)", "(x = x)", "(boy(x) | girl(x))", "(boy(x) & girl(x))", "love(adam, x)", "love(x, adam)", "-(x = adam)", "exists z22. love(x, z22)", "exists y. love(y, x)", "all y. (girl(y) -> love(x, y))", "all y. (girl(y) -> love(y, x))", "all y. (girl(y) -> (boy(x) & love(y, x)))", "(boy(x) & all y. (girl(y) -> love(x, y)))", "(boy(x) & all y. (girl(y) -> love(y, x)))", "(boy(x) & exists y. (girl(y) & love(y, x)))", "(girl(x) -> dog(x))", "all y. (dog(y) -> (x = y))", "exists y. love(y, x)", "exists y. (love(adam, y) & love(y, x))", ] if trace: print(m2) for fmla in formulas: print(fmla) Expression.fromstring(fmla) parsed = [Expression.fromstring(fmla) for fmla in formulas] for p in parsed: g2.purge() print("The satisfiers of '%s' are: %s" % (p, m2.satisfiers(p, "x", g2, trace))) def demo(num=0, trace=None): """ Run exists demos. - num = 1: propositional logic demo - num = 2: first order model demo (only if trace is set) - num = 3: first order sentences demo - num = 4: satisfaction of open formulas demo - any other value: run all the demos :param trace: trace = 1, or trace = 2 for more verbose tracing """ demos = {1: propdemo, 2: folmodel, 3: foldemo, 4: satdemo} try: demos[num](trace=trace) except KeyError: for num in demos: demos[num](trace=trace) if __name__ == "__main__": demo(2, trace=0)