You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
259 lines
7.2 KiB
Python
259 lines
7.2 KiB
Python
# Natural Language Toolkit: Lexical Functional Grammar
|
|
#
|
|
# Author: Dan Garrette <dhgarrette@gmail.com>
|
|
#
|
|
# Copyright (C) 2001-2020 NLTK Project
|
|
# URL: <http://nltk.org/>
|
|
# For license information, see LICENSE.TXT
|
|
|
|
from itertools import chain
|
|
|
|
from nltk.internals import Counter
|
|
|
|
|
|
class FStructure(dict):
|
|
def safeappend(self, key, item):
|
|
"""
|
|
Append 'item' to the list at 'key'. If no list exists for 'key', then
|
|
construct one.
|
|
"""
|
|
if key not in self:
|
|
self[key] = []
|
|
self[key].append(item)
|
|
|
|
def __setitem__(self, key, value):
|
|
dict.__setitem__(self, key.lower(), value)
|
|
|
|
def __getitem__(self, key):
|
|
return dict.__getitem__(self, key.lower())
|
|
|
|
def __contains__(self, key):
|
|
return dict.__contains__(self, key.lower())
|
|
|
|
def to_glueformula_list(self, glue_dict):
|
|
depgraph = self.to_depgraph()
|
|
return glue_dict.to_glueformula_list(depgraph)
|
|
|
|
def to_depgraph(self, rel=None):
|
|
from nltk.parse.dependencygraph import DependencyGraph
|
|
|
|
depgraph = DependencyGraph()
|
|
nodes = depgraph.nodes
|
|
|
|
self._to_depgraph(nodes, 0, "ROOT")
|
|
|
|
# Add all the dependencies for all the nodes
|
|
for address, node in nodes.items():
|
|
for n2 in (n for n in nodes.values() if n["rel"] != "TOP"):
|
|
if n2["head"] == address:
|
|
relation = n2["rel"]
|
|
node["deps"].setdefault(relation, [])
|
|
node["deps"][relation].append(n2["address"])
|
|
|
|
depgraph.root = nodes[1]
|
|
|
|
return depgraph
|
|
|
|
def _to_depgraph(self, nodes, head, rel):
|
|
index = len(nodes)
|
|
|
|
nodes[index].update(
|
|
{
|
|
"address": index,
|
|
"word": self.pred[0],
|
|
"tag": self.pred[1],
|
|
"head": head,
|
|
"rel": rel,
|
|
}
|
|
)
|
|
|
|
for feature in sorted(self):
|
|
for item in sorted(self[feature]):
|
|
if isinstance(item, FStructure):
|
|
item._to_depgraph(nodes, index, feature)
|
|
elif isinstance(item, tuple):
|
|
new_index = len(nodes)
|
|
nodes[new_index].update(
|
|
{
|
|
"address": new_index,
|
|
"word": item[0],
|
|
"tag": item[1],
|
|
"head": index,
|
|
"rel": feature,
|
|
}
|
|
)
|
|
elif isinstance(item, list):
|
|
for n in item:
|
|
n._to_depgraph(nodes, index, feature)
|
|
else:
|
|
raise Exception(
|
|
"feature %s is not an FStruct, a list, or a tuple" % feature
|
|
)
|
|
|
|
@staticmethod
|
|
def read_depgraph(depgraph):
|
|
return FStructure._read_depgraph(depgraph.root, depgraph)
|
|
|
|
@staticmethod
|
|
def _read_depgraph(node, depgraph, label_counter=None, parent=None):
|
|
if not label_counter:
|
|
label_counter = Counter()
|
|
|
|
if node["rel"].lower() in ["spec", "punct"]:
|
|
# the value of a 'spec' entry is a word, not an FStructure
|
|
return (node["word"], node["tag"])
|
|
|
|
else:
|
|
fstruct = FStructure()
|
|
fstruct.pred = None
|
|
fstruct.label = FStructure._make_label(label_counter.get())
|
|
|
|
fstruct.parent = parent
|
|
|
|
word, tag = node["word"], node["tag"]
|
|
if tag[:2] == "VB":
|
|
if tag[2:3] == "D":
|
|
fstruct.safeappend("tense", ("PAST", "tense"))
|
|
fstruct.pred = (word, tag[:2])
|
|
|
|
if not fstruct.pred:
|
|
fstruct.pred = (word, tag)
|
|
|
|
children = [depgraph.nodes[idx] for idx in chain(*node["deps"].values())]
|
|
for child in children:
|
|
fstruct.safeappend(
|
|
child["rel"],
|
|
FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
|
|
)
|
|
|
|
return fstruct
|
|
|
|
@staticmethod
|
|
def _make_label(value):
|
|
"""
|
|
Pick an alphabetic character as identifier for an entity in the model.
|
|
|
|
:param value: where to index into the list of characters
|
|
:type value: int
|
|
"""
|
|
letter = [
|
|
"f",
|
|
"g",
|
|
"h",
|
|
"i",
|
|
"j",
|
|
"k",
|
|
"l",
|
|
"m",
|
|
"n",
|
|
"o",
|
|
"p",
|
|
"q",
|
|
"r",
|
|
"s",
|
|
"t",
|
|
"u",
|
|
"v",
|
|
"w",
|
|
"x",
|
|
"y",
|
|
"z",
|
|
"a",
|
|
"b",
|
|
"c",
|
|
"d",
|
|
"e",
|
|
][value - 1]
|
|
num = int(value) // 26
|
|
if num > 0:
|
|
return letter + str(num)
|
|
else:
|
|
return letter
|
|
|
|
def __repr__(self):
|
|
return self.__str__().replace("\n", "")
|
|
|
|
def __str__(self):
|
|
return self.pretty_format()
|
|
|
|
def pretty_format(self, indent=3):
|
|
try:
|
|
accum = "%s:[" % self.label
|
|
except NameError:
|
|
accum = "["
|
|
try:
|
|
accum += "pred '%s'" % (self.pred[0])
|
|
except NameError:
|
|
pass
|
|
|
|
for feature in sorted(self):
|
|
for item in self[feature]:
|
|
if isinstance(item, FStructure):
|
|
next_indent = indent + len(feature) + 3 + len(self.label)
|
|
accum += "\n%s%s %s" % (
|
|
" " * (indent),
|
|
feature,
|
|
item.pretty_format(next_indent),
|
|
)
|
|
elif isinstance(item, tuple):
|
|
accum += "\n%s%s '%s'" % (" " * (indent), feature, item[0])
|
|
elif isinstance(item, list):
|
|
accum += "\n%s%s {%s}" % (
|
|
" " * (indent),
|
|
feature,
|
|
("\n%s" % (" " * (indent + len(feature) + 2))).join(item),
|
|
)
|
|
else: # ERROR
|
|
raise Exception(
|
|
"feature %s is not an FStruct, a list, or a tuple" % feature
|
|
)
|
|
return accum + "]"
|
|
|
|
|
|
def demo_read_depgraph():
|
|
from nltk.parse.dependencygraph import DependencyGraph
|
|
|
|
dg1 = DependencyGraph(
|
|
"""\
|
|
Esso NNP 2 SUB
|
|
said VBD 0 ROOT
|
|
the DT 5 NMOD
|
|
Whiting NNP 5 NMOD
|
|
field NN 6 SUB
|
|
started VBD 2 VMOD
|
|
production NN 6 OBJ
|
|
Tuesday NNP 6 VMOD
|
|
"""
|
|
)
|
|
dg2 = DependencyGraph(
|
|
"""\
|
|
John NNP 2 SUB
|
|
sees VBP 0 ROOT
|
|
Mary NNP 2 OBJ
|
|
"""
|
|
)
|
|
dg3 = DependencyGraph(
|
|
"""\
|
|
a DT 2 SPEC
|
|
man NN 3 SUBJ
|
|
walks VB 0 ROOT
|
|
"""
|
|
)
|
|
dg4 = DependencyGraph(
|
|
"""\
|
|
every DT 2 SPEC
|
|
girl NN 3 SUBJ
|
|
chases VB 0 ROOT
|
|
a DT 5 SPEC
|
|
dog NN 3 OBJ
|
|
"""
|
|
)
|
|
|
|
depgraphs = [dg1, dg2, dg3, dg4]
|
|
for dg in depgraphs:
|
|
print(FStructure.read_depgraph(dg))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
demo_read_depgraph()
|