|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# Natural Language Toolkit: Chart Parser for Feature-Based Grammars
|
|
|
|
#
|
|
|
|
# Copyright (C) 2001-2020 NLTK Project
|
|
|
|
# Author: Rob Speer <rspeer@mit.edu>
|
|
|
|
# Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
|
|
|
|
# URL: <http://nltk.org/>
|
|
|
|
# For license information, see LICENSE.TXT
|
|
|
|
|
|
|
|
"""
|
|
|
|
Extension of chart parsing implementation to handle grammars with
|
|
|
|
feature structures as nodes.
|
|
|
|
"""
|
|
|
|
from time import perf_counter
|
|
|
|
|
|
|
|
from nltk.featstruct import FeatStruct, unify, TYPE, find_variables
|
|
|
|
from nltk.sem import logic
|
|
|
|
from nltk.tree import Tree
|
|
|
|
from nltk.grammar import (
|
|
|
|
Nonterminal,
|
|
|
|
Production,
|
|
|
|
CFG,
|
|
|
|
FeatStructNonterminal,
|
|
|
|
is_nonterminal,
|
|
|
|
is_terminal,
|
|
|
|
)
|
|
|
|
from nltk.parse.chart import (
|
|
|
|
TreeEdge,
|
|
|
|
Chart,
|
|
|
|
ChartParser,
|
|
|
|
EdgeI,
|
|
|
|
FundamentalRule,
|
|
|
|
LeafInitRule,
|
|
|
|
EmptyPredictRule,
|
|
|
|
BottomUpPredictRule,
|
|
|
|
SingleEdgeFundamentalRule,
|
|
|
|
BottomUpPredictCombineRule,
|
|
|
|
CachedTopDownPredictRule,
|
|
|
|
TopDownInitRule,
|
|
|
|
)
|
|
|
|
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
# Tree Edge
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureTreeEdge(TreeEdge):
|
|
|
|
"""
|
|
|
|
A specialized tree edge that allows shared variable bindings
|
|
|
|
between nonterminals on the left-hand side and right-hand side.
|
|
|
|
|
|
|
|
Each ``FeatureTreeEdge`` contains a set of ``bindings``, i.e., a
|
|
|
|
dictionary mapping from variables to values. If the edge is not
|
|
|
|
complete, then these bindings are simply stored. However, if the
|
|
|
|
edge is complete, then the constructor applies these bindings to
|
|
|
|
every nonterminal in the edge whose symbol implements the
|
|
|
|
interface ``SubstituteBindingsI``.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, span, lhs, rhs, dot=0, bindings=None):
|
|
|
|
"""
|
|
|
|
Construct a new edge. If the edge is incomplete (i.e., if
|
|
|
|
``dot<len(rhs)``), then store the bindings as-is. If the edge
|
|
|
|
is complete (i.e., if ``dot==len(rhs)``), then apply the
|
|
|
|
bindings to all nonterminals in ``lhs`` and ``rhs``, and then
|
|
|
|
clear the bindings. See ``TreeEdge`` for a description of
|
|
|
|
the other arguments.
|
|
|
|
"""
|
|
|
|
if bindings is None:
|
|
|
|
bindings = {}
|
|
|
|
|
|
|
|
# If the edge is complete, then substitute in the bindings,
|
|
|
|
# and then throw them away. (If we didn't throw them away, we
|
|
|
|
# might think that 2 complete edges are different just because
|
|
|
|
# they have different bindings, even though all bindings have
|
|
|
|
# already been applied.)
|
|
|
|
if dot == len(rhs) and bindings:
|
|
|
|
lhs = self._bind(lhs, bindings)
|
|
|
|
rhs = [self._bind(elt, bindings) for elt in rhs]
|
|
|
|
bindings = {}
|
|
|
|
|
|
|
|
# Initialize the edge.
|
|
|
|
TreeEdge.__init__(self, span, lhs, rhs, dot)
|
|
|
|
self._bindings = bindings
|
|
|
|
self._comparison_key = (self._comparison_key, tuple(sorted(bindings.items())))
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def from_production(production, index):
|
|
|
|
"""
|
|
|
|
:return: A new ``TreeEdge`` formed from the given production.
|
|
|
|
The new edge's left-hand side and right-hand side will
|
|
|
|
be taken from ``production``; its span will be
|
|
|
|
``(index,index)``; and its dot position will be ``0``.
|
|
|
|
:rtype: TreeEdge
|
|
|
|
"""
|
|
|
|
return FeatureTreeEdge(
|
|
|
|
span=(index, index), lhs=production.lhs(), rhs=production.rhs(), dot=0
|
|
|
|
)
|
|
|
|
|
|
|
|
def move_dot_forward(self, new_end, bindings=None):
|
|
|
|
"""
|
|
|
|
:return: A new ``FeatureTreeEdge`` formed from this edge.
|
|
|
|
The new edge's dot position is increased by ``1``,
|
|
|
|
and its end index will be replaced by ``new_end``.
|
|
|
|
:rtype: FeatureTreeEdge
|
|
|
|
:param new_end: The new end index.
|
|
|
|
:type new_end: int
|
|
|
|
:param bindings: Bindings for the new edge.
|
|
|
|
:type bindings: dict
|
|
|
|
"""
|
|
|
|
return FeatureTreeEdge(
|
|
|
|
span=(self._span[0], new_end),
|
|
|
|
lhs=self._lhs,
|
|
|
|
rhs=self._rhs,
|
|
|
|
dot=self._dot + 1,
|
|
|
|
bindings=bindings,
|
|
|
|
)
|
|
|
|
|
|
|
|
def _bind(self, nt, bindings):
|
|
|
|
if not isinstance(nt, FeatStructNonterminal):
|
|
|
|
return nt
|
|
|
|
return nt.substitute_bindings(bindings)
|
|
|
|
|
|
|
|
def next_with_bindings(self):
|
|
|
|
return self._bind(self.nextsym(), self._bindings)
|
|
|
|
|
|
|
|
def bindings(self):
|
|
|
|
"""
|
|
|
|
Return a copy of this edge's bindings dictionary.
|
|
|
|
"""
|
|
|
|
return self._bindings.copy()
|
|
|
|
|
|
|
|
def variables(self):
|
|
|
|
"""
|
|
|
|
:return: The set of variables used by this edge.
|
|
|
|
:rtype: set(Variable)
|
|
|
|
"""
|
|
|
|
return find_variables(
|
|
|
|
[self._lhs]
|
|
|
|
+ list(self._rhs)
|
|
|
|
+ list(self._bindings.keys())
|
|
|
|
+ list(self._bindings.values()),
|
|
|
|
fs_class=FeatStruct,
|
|
|
|
)
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
if self.is_complete():
|
|
|
|
return super().__str__()
|
|
|
|
else:
|
|
|
|
bindings = "{%s}" % ", ".join(
|
|
|
|
"%s: %r" % item for item in sorted(self._bindings.items())
|
|
|
|
)
|
|
|
|
return "%s %s" % (super().__str__(), bindings)
|
|
|
|
|
|
|
|
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
# A specialized Chart for feature grammars
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
# TODO: subsumes check when adding new edges
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureChart(Chart):
|
|
|
|
"""
|
|
|
|
A Chart for feature grammars.
|
|
|
|
:see: ``Chart`` for more information.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def select(self, **restrictions):
|
|
|
|
"""
|
|
|
|
Returns an iterator over the edges in this chart.
|
|
|
|
See ``Chart.select`` for more information about the
|
|
|
|
``restrictions`` on the edges.
|
|
|
|
"""
|
|
|
|
# If there are no restrictions, then return all edges.
|
|
|
|
if restrictions == {}:
|
|
|
|
return iter(self._edges)
|
|
|
|
|
|
|
|
# Find the index corresponding to the given restrictions.
|
|
|
|
restr_keys = sorted(restrictions.keys())
|
|
|
|
restr_keys = tuple(restr_keys)
|
|
|
|
|
|
|
|
# If it doesn't exist, then create it.
|
|
|
|
if restr_keys not in self._indexes:
|
|
|
|
self._add_index(restr_keys)
|
|
|
|
|
|
|
|
vals = tuple(
|
|
|
|
self._get_type_if_possible(restrictions[key]) for key in restr_keys
|
|
|
|
)
|
|
|
|
return iter(self._indexes[restr_keys].get(vals, []))
|
|
|
|
|
|
|
|
def _add_index(self, restr_keys):
|
|
|
|
"""
|
|
|
|
A helper function for ``select``, which creates a new index for
|
|
|
|
a given set of attributes (aka restriction keys).
|
|
|
|
"""
|
|
|
|
# Make sure it's a valid index.
|
|
|
|
for key in restr_keys:
|
|
|
|
if not hasattr(EdgeI, key):
|
|
|
|
raise ValueError("Bad restriction: %s" % key)
|
|
|
|
|
|
|
|
# Create the index.
|
|
|
|
index = self._indexes[restr_keys] = {}
|
|
|
|
|
|
|
|
# Add all existing edges to the index.
|
|
|
|
for edge in self._edges:
|
|
|
|
vals = tuple(
|
|
|
|
self._get_type_if_possible(getattr(edge, key)()) for key in restr_keys
|
|
|
|
)
|
|
|
|
index.setdefault(vals, []).append(edge)
|
|
|
|
|
|
|
|
def _register_with_indexes(self, edge):
|
|
|
|
"""
|
|
|
|
A helper function for ``insert``, which registers the new
|
|
|
|
edge with all existing indexes.
|
|
|
|
"""
|
|
|
|
for (restr_keys, index) in self._indexes.items():
|
|
|
|
vals = tuple(
|
|
|
|
self._get_type_if_possible(getattr(edge, key)()) for key in restr_keys
|
|
|
|
)
|
|
|
|
index.setdefault(vals, []).append(edge)
|
|
|
|
|
|
|
|
def _get_type_if_possible(self, item):
|
|
|
|
"""
|
|
|
|
Helper function which returns the ``TYPE`` feature of the ``item``,
|
|
|
|
if it exists, otherwise it returns the ``item`` itself
|
|
|
|
"""
|
|
|
|
if isinstance(item, dict) and TYPE in item:
|
|
|
|
return item[TYPE]
|
|
|
|
else:
|
|
|
|
return item
|
|
|
|
|
|
|
|
def parses(self, start, tree_class=Tree):
|
|
|
|
for edge in self.select(start=0, end=self._num_leaves):
|
|
|
|
if (
|
|
|
|
(isinstance(edge, FeatureTreeEdge))
|
|
|
|
and (edge.lhs()[TYPE] == start[TYPE])
|
|
|
|
and (unify(edge.lhs(), start, rename_vars=True))
|
|
|
|
):
|
|
|
|
for tree in self.trees(edge, complete=True, tree_class=tree_class):
|
|
|
|
yield tree
|
|
|
|
|
|
|
|
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
# Fundamental Rule
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureFundamentalRule(FundamentalRule):
|
|
|
|
"""
|
|
|
|
A specialized version of the fundamental rule that operates on
|
|
|
|
nonterminals whose symbols are ``FeatStructNonterminal``s. Rather
|
|
|
|
tha simply comparing the nonterminals for equality, they are
|
|
|
|
unified. Variable bindings from these unifications are collected
|
|
|
|
and stored in the chart using a ``FeatureTreeEdge``. When a
|
|
|
|
complete edge is generated, these bindings are applied to all
|
|
|
|
nonterminals in the edge.
|
|
|
|
|
|
|
|
The fundamental rule states that:
|
|
|
|
|
|
|
|
- ``[A -> alpha \* B1 beta][i:j]``
|
|
|
|
- ``[B2 -> gamma \*][j:k]``
|
|
|
|
|
|
|
|
licenses the edge:
|
|
|
|
|
|
|
|
- ``[A -> alpha B3 \* beta][i:j]``
|
|
|
|
|
|
|
|
assuming that B1 and B2 can be unified to generate B3.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def apply(self, chart, grammar, left_edge, right_edge):
|
|
|
|
# Make sure the rule is applicable.
|
|
|
|
if not (
|
|
|
|
left_edge.end() == right_edge.start()
|
|
|
|
and left_edge.is_incomplete()
|
|
|
|
and right_edge.is_complete()
|
|
|
|
and isinstance(left_edge, FeatureTreeEdge)
|
|
|
|
):
|
|
|
|
return
|
|
|
|
found = right_edge.lhs()
|
|
|
|
nextsym = left_edge.nextsym()
|
|
|
|
if isinstance(right_edge, FeatureTreeEdge):
|
|
|
|
if not is_nonterminal(nextsym):
|
|
|
|
return
|
|
|
|
if left_edge.nextsym()[TYPE] != right_edge.lhs()[TYPE]:
|
|
|
|
return
|
|
|
|
# Create a copy of the bindings.
|
|
|
|
bindings = left_edge.bindings()
|
|
|
|
# We rename vars here, because we don't want variables
|
|
|
|
# from the two different productions to match.
|
|
|
|
found = found.rename_variables(used_vars=left_edge.variables())
|
|
|
|
# Unify B1 (left_edge.nextsym) with B2 (right_edge.lhs) to
|
|
|
|
# generate B3 (result).
|
|
|
|
result = unify(nextsym, found, bindings, rename_vars=False)
|
|
|
|
if result is None:
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
if nextsym != found:
|
|
|
|
return
|
|
|
|
# Create a copy of the bindings.
|
|
|
|
bindings = left_edge.bindings()
|
|
|
|
|
|
|
|
# Construct the new edge.
|
|
|
|
new_edge = left_edge.move_dot_forward(right_edge.end(), bindings)
|
|
|
|
|
|
|
|
# Add it to the chart, with appropriate child pointers.
|
|
|
|
if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
|
|
|
|
yield new_edge
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureSingleEdgeFundamentalRule(SingleEdgeFundamentalRule):
|
|
|
|
"""
|
|
|
|
A specialized version of the completer / single edge fundamental rule
|
|
|
|
that operates on nonterminals whose symbols are ``FeatStructNonterminal``s.
|
|
|
|
Rather than simply comparing the nonterminals for equality, they are
|
|
|
|
unified.
|
|
|
|
"""
|
|
|
|
|
|
|
|
_fundamental_rule = FeatureFundamentalRule()
|
|
|
|
|
|
|
|
def _apply_complete(self, chart, grammar, right_edge):
|
|
|
|
fr = self._fundamental_rule
|
|
|
|
for left_edge in chart.select(
|
|
|
|
end=right_edge.start(), is_complete=False, nextsym=right_edge.lhs()
|
|
|
|
):
|
|
|
|
for new_edge in fr.apply(chart, grammar, left_edge, right_edge):
|
|
|
|
yield new_edge
|
|
|
|
|
|
|
|
def _apply_incomplete(self, chart, grammar, left_edge):
|
|
|
|
fr = self._fundamental_rule
|
|
|
|
for right_edge in chart.select(
|
|
|
|
start=left_edge.end(), is_complete=True, lhs=left_edge.nextsym()
|
|
|
|
):
|
|
|
|
for new_edge in fr.apply(chart, grammar, left_edge, right_edge):
|
|
|
|
yield new_edge
|
|
|
|
|
|
|
|
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
# Top-Down Prediction
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureTopDownInitRule(TopDownInitRule):
|
|
|
|
def apply(self, chart, grammar):
|
|
|
|
for prod in grammar.productions(lhs=grammar.start()):
|
|
|
|
new_edge = FeatureTreeEdge.from_production(prod, 0)
|
|
|
|
if chart.insert(new_edge, ()):
|
|
|
|
yield new_edge
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureTopDownPredictRule(CachedTopDownPredictRule):
|
|
|
|
"""
|
|
|
|
A specialized version of the (cached) top down predict rule that operates
|
|
|
|
on nonterminals whose symbols are ``FeatStructNonterminal``s. Rather
|
|
|
|
than simply comparing the nonterminals for equality, they are
|
|
|
|
unified.
|
|
|
|
|
|
|
|
The top down expand rule states that:
|
|
|
|
|
|
|
|
- ``[A -> alpha \* B1 beta][i:j]``
|
|
|
|
|
|
|
|
licenses the edge:
|
|
|
|
|
|
|
|
- ``[B2 -> \* gamma][j:j]``
|
|
|
|
|
|
|
|
for each grammar production ``B2 -> gamma``, assuming that B1
|
|
|
|
and B2 can be unified.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def apply(self, chart, grammar, edge):
|
|
|
|
if edge.is_complete():
|
|
|
|
return
|
|
|
|
nextsym, index = edge.nextsym(), edge.end()
|
|
|
|
if not is_nonterminal(nextsym):
|
|
|
|
return
|
|
|
|
|
|
|
|
# If we've already applied this rule to an edge with the same
|
|
|
|
# next & end, and the chart & grammar have not changed, then
|
|
|
|
# just return (no new edges to add).
|
|
|
|
nextsym_with_bindings = edge.next_with_bindings()
|
|
|
|
done = self._done.get((nextsym_with_bindings, index), (None, None))
|
|
|
|
if done[0] is chart and done[1] is grammar:
|
|
|
|
return
|
|
|
|
|
|
|
|
for prod in grammar.productions(lhs=nextsym):
|
|
|
|
# If the left corner in the predicted production is
|
|
|
|
# leaf, it must match with the input.
|
|
|
|
if prod.rhs():
|
|
|
|
first = prod.rhs()[0]
|
|
|
|
if is_terminal(first):
|
|
|
|
if index >= chart.num_leaves():
|
|
|
|
continue
|
|
|
|
if first != chart.leaf(index):
|
|
|
|
continue
|
|
|
|
|
|
|
|
# We rename vars here, because we don't want variables
|
|
|
|
# from the two different productions to match.
|
|
|
|
if unify(prod.lhs(), nextsym_with_bindings, rename_vars=True):
|
|
|
|
new_edge = FeatureTreeEdge.from_production(prod, edge.end())
|
|
|
|
if chart.insert(new_edge, ()):
|
|
|
|
yield new_edge
|
|
|
|
|
|
|
|
# Record the fact that we've applied this rule.
|
|
|
|
self._done[nextsym_with_bindings, index] = (chart, grammar)
|
|
|
|
|
|
|
|
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
# Bottom-Up Prediction
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureBottomUpPredictRule(BottomUpPredictRule):
|
|
|
|
def apply(self, chart, grammar, edge):
|
|
|
|
if edge.is_incomplete():
|
|
|
|
return
|
|
|
|
for prod in grammar.productions(rhs=edge.lhs()):
|
|
|
|
if isinstance(edge, FeatureTreeEdge):
|
|
|
|
_next = prod.rhs()[0]
|
|
|
|
if not is_nonterminal(_next):
|
|
|
|
continue
|
|
|
|
|
|
|
|
new_edge = FeatureTreeEdge.from_production(prod, edge.start())
|
|
|
|
if chart.insert(new_edge, ()):
|
|
|
|
yield new_edge
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureBottomUpPredictCombineRule(BottomUpPredictCombineRule):
|
|
|
|
def apply(self, chart, grammar, edge):
|
|
|
|
if edge.is_incomplete():
|
|
|
|
return
|
|
|
|
found = edge.lhs()
|
|
|
|
for prod in grammar.productions(rhs=found):
|
|
|
|
bindings = {}
|
|
|
|
if isinstance(edge, FeatureTreeEdge):
|
|
|
|
_next = prod.rhs()[0]
|
|
|
|
if not is_nonterminal(_next):
|
|
|
|
continue
|
|
|
|
|
|
|
|
# We rename vars here, because we don't want variables
|
|
|
|
# from the two different productions to match.
|
|
|
|
used_vars = find_variables(
|
|
|
|
(prod.lhs(),) + prod.rhs(), fs_class=FeatStruct
|
|
|
|
)
|
|
|
|
found = found.rename_variables(used_vars=used_vars)
|
|
|
|
|
|
|
|
result = unify(_next, found, bindings, rename_vars=False)
|
|
|
|
if result is None:
|
|
|
|
continue
|
|
|
|
|
|
|
|
new_edge = FeatureTreeEdge.from_production(
|
|
|
|
prod, edge.start()
|
|
|
|
).move_dot_forward(edge.end(), bindings)
|
|
|
|
if chart.insert(new_edge, (edge,)):
|
|
|
|
yield new_edge
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureEmptyPredictRule(EmptyPredictRule):
|
|
|
|
def apply(self, chart, grammar):
|
|
|
|
for prod in grammar.productions(empty=True):
|
|
|
|
for index in range(chart.num_leaves() + 1):
|
|
|
|
new_edge = FeatureTreeEdge.from_production(prod, index)
|
|
|
|
if chart.insert(new_edge, ()):
|
|
|
|
yield new_edge
|
|
|
|
|
|
|
|
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
# Feature Chart Parser
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
TD_FEATURE_STRATEGY = [
|
|
|
|
LeafInitRule(),
|
|
|
|
FeatureTopDownInitRule(),
|
|
|
|
FeatureTopDownPredictRule(),
|
|
|
|
FeatureSingleEdgeFundamentalRule(),
|
|
|
|
]
|
|
|
|
BU_FEATURE_STRATEGY = [
|
|
|
|
LeafInitRule(),
|
|
|
|
FeatureEmptyPredictRule(),
|
|
|
|
FeatureBottomUpPredictRule(),
|
|
|
|
FeatureSingleEdgeFundamentalRule(),
|
|
|
|
]
|
|
|
|
BU_LC_FEATURE_STRATEGY = [
|
|
|
|
LeafInitRule(),
|
|
|
|
FeatureEmptyPredictRule(),
|
|
|
|
FeatureBottomUpPredictCombineRule(),
|
|
|
|
FeatureSingleEdgeFundamentalRule(),
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureChartParser(ChartParser):
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
grammar,
|
|
|
|
strategy=BU_LC_FEATURE_STRATEGY,
|
|
|
|
trace_chart_width=20,
|
|
|
|
chart_class=FeatureChart,
|
|
|
|
**parser_args
|
|
|
|
):
|
|
|
|
ChartParser.__init__(
|
|
|
|
self,
|
|
|
|
grammar,
|
|
|
|
strategy=strategy,
|
|
|
|
trace_chart_width=trace_chart_width,
|
|
|
|
chart_class=chart_class,
|
|
|
|
**parser_args
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureTopDownChartParser(FeatureChartParser):
|
|
|
|
def __init__(self, grammar, **parser_args):
|
|
|
|
FeatureChartParser.__init__(self, grammar, TD_FEATURE_STRATEGY, **parser_args)
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureBottomUpChartParser(FeatureChartParser):
|
|
|
|
def __init__(self, grammar, **parser_args):
|
|
|
|
FeatureChartParser.__init__(self, grammar, BU_FEATURE_STRATEGY, **parser_args)
|
|
|
|
|
|
|
|
|
|
|
|
class FeatureBottomUpLeftCornerChartParser(FeatureChartParser):
|
|
|
|
def __init__(self, grammar, **parser_args):
|
|
|
|
FeatureChartParser.__init__(
|
|
|
|
self, grammar, BU_LC_FEATURE_STRATEGY, **parser_args
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
# Instantiate Variable Chart
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
|
|
class InstantiateVarsChart(FeatureChart):
|
|
|
|
"""
|
|
|
|
A specialized chart that 'instantiates' variables whose names
|
|
|
|
start with '@', by replacing them with unique new variables.
|
|
|
|
In particular, whenever a complete edge is added to the chart, any
|
|
|
|
variables in the edge's ``lhs`` whose names start with '@' will be
|
|
|
|
replaced by unique new ``Variable``s.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, tokens):
|
|
|
|
FeatureChart.__init__(self, tokens)
|
|
|
|
|
|
|
|
def initialize(self):
|
|
|
|
self._instantiated = set()
|
|
|
|
FeatureChart.initialize(self)
|
|
|
|
|
|
|
|
def insert(self, edge, child_pointer_list):
|
|
|
|
if edge in self._instantiated:
|
|
|
|
return False
|
|
|
|
self.instantiate_edge(edge)
|
|
|
|
return FeatureChart.insert(self, edge, child_pointer_list)
|
|
|
|
|
|
|
|
def instantiate_edge(self, edge):
|
|
|
|
"""
|
|
|
|
If the edge is a ``FeatureTreeEdge``, and it is complete,
|
|
|
|
then instantiate all variables whose names start with '@',
|
|
|
|
by replacing them with unique new variables.
|
|
|
|
|
|
|
|
Note that instantiation is done in-place, since the
|
|
|
|
parsing algorithms might already hold a reference to
|
|
|
|
the edge for future use.
|
|
|
|
"""
|
|
|
|
# If the edge is a leaf, or is not complete, or is
|
|
|
|
# already in the chart, then just return it as-is.
|
|
|
|
if not isinstance(edge, FeatureTreeEdge):
|
|
|
|
return
|
|
|
|
if not edge.is_complete():
|
|
|
|
return
|
|
|
|
if edge in self._edge_to_cpls:
|
|
|
|
return
|
|
|
|
|
|
|
|
# Get a list of variables that need to be instantiated.
|
|
|
|
# If there are none, then return as-is.
|
|
|
|
inst_vars = self.inst_vars(edge)
|
|
|
|
if not inst_vars:
|
|
|
|
return
|
|
|
|
|
|
|
|
# Instantiate the edge!
|
|
|
|
self._instantiated.add(edge)
|
|
|
|
edge._lhs = edge.lhs().substitute_bindings(inst_vars)
|
|
|
|
|
|
|
|
def inst_vars(self, edge):
|
|
|
|
return dict(
|
|
|
|
(var, logic.unique_variable())
|
|
|
|
for var in edge.lhs().variables()
|
|
|
|
if var.name.startswith("@")
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
# Demo
|
|
|
|
# ////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
|
|
def demo_grammar():
|
|
|
|
from nltk.grammar import FeatureGrammar
|
|
|
|
|
|
|
|
return FeatureGrammar.fromstring(
|
|
|
|
"""
|
|
|
|
S -> NP VP
|
|
|
|
PP -> Prep NP
|
|
|
|
NP -> NP PP
|
|
|
|
VP -> VP PP
|
|
|
|
VP -> Verb NP
|
|
|
|
VP -> Verb
|
|
|
|
NP -> Det[pl=?x] Noun[pl=?x]
|
|
|
|
NP -> "John"
|
|
|
|
NP -> "I"
|
|
|
|
Det -> "the"
|
|
|
|
Det -> "my"
|
|
|
|
Det[-pl] -> "a"
|
|
|
|
Noun[-pl] -> "dog"
|
|
|
|
Noun[-pl] -> "cookie"
|
|
|
|
Verb -> "ate"
|
|
|
|
Verb -> "saw"
|
|
|
|
Prep -> "with"
|
|
|
|
Prep -> "under"
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def demo(
|
|
|
|
print_times=True,
|
|
|
|
print_grammar=True,
|
|
|
|
print_trees=True,
|
|
|
|
print_sentence=True,
|
|
|
|
trace=1,
|
|
|
|
parser=FeatureChartParser,
|
|
|
|
sent="I saw John with a dog with my cookie",
|
|
|
|
):
|
|
|
|
import sys, time
|
|
|
|
|
|
|
|
print()
|
|
|
|
grammar = demo_grammar()
|
|
|
|
if print_grammar:
|
|
|
|
print(grammar)
|
|
|
|
print()
|
|
|
|
print("*", parser.__name__)
|
|
|
|
if print_sentence:
|
|
|
|
print("Sentence:", sent)
|
|
|
|
tokens = sent.split()
|
|
|
|
t = perf_counter()
|
|
|
|
cp = parser(grammar, trace=trace)
|
|
|
|
chart = cp.chart_parse(tokens)
|
|
|
|
trees = list(chart.parses(grammar.start()))
|
|
|
|
if print_times:
|
|
|
|
print("Time: %s" % (perf_counter() - t))
|
|
|
|
if print_trees:
|
|
|
|
for tree in trees:
|
|
|
|
print(tree)
|
|
|
|
else:
|
|
|
|
print("Nr trees:", len(trees))
|
|
|
|
|
|
|
|
|
|
|
|
def run_profile():
|
|
|
|
import profile
|
|
|
|
|
|
|
|
profile.run("for i in range(1): demo()", "/tmp/profile.out")
|
|
|
|
import pstats
|
|
|
|
|
|
|
|
p = pstats.Stats("/tmp/profile.out")
|
|
|
|
p.strip_dirs().sort_stats("time", "cum").print_stats(60)
|
|
|
|
p.strip_dirs().sort_stats("cum", "time").print_stats(60)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
from nltk.data import load
|
|
|
|
|
|
|
|
demo()
|
|
|
|
print()
|
|
|
|
grammar = load("grammars/book_grammars/feat0.fcfg")
|
|
|
|
cp = FeatureChartParser(grammar, trace=2)
|
|
|
|
sent = "Kim likes children"
|
|
|
|
tokens = sent.split()
|
|
|
|
trees = cp.parse(tokens)
|
|
|
|
for tree in trees:
|
|
|
|
print(tree)
|