You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
262 lines
7.4 KiB
Python
262 lines
7.4 KiB
Python
5 years ago
|
# Natural Language Toolkit: Lexical Functional Grammar
|
||
|
#
|
||
|
# Author: Dan Garrette <dhgarrette@gmail.com>
|
||
|
#
|
||
|
# Copyright (C) 2001-2019 NLTK Project
|
||
|
# URL: <http://nltk.org/>
|
||
|
# For license information, see LICENSE.TXT
|
||
|
from __future__ import print_function, division, unicode_literals
|
||
|
|
||
|
from itertools import chain
|
||
|
|
||
|
from nltk.internals import Counter
|
||
|
from nltk.compat import python_2_unicode_compatible
|
||
|
|
||
|
|
||
|
@python_2_unicode_compatible
|
||
|
class FStructure(dict):
|
||
|
def safeappend(self, key, item):
|
||
|
"""
|
||
|
Append 'item' to the list at 'key'. If no list exists for 'key', then
|
||
|
construct one.
|
||
|
"""
|
||
|
if key not in self:
|
||
|
self[key] = []
|
||
|
self[key].append(item)
|
||
|
|
||
|
def __setitem__(self, key, value):
|
||
|
dict.__setitem__(self, key.lower(), value)
|
||
|
|
||
|
def __getitem__(self, key):
|
||
|
return dict.__getitem__(self, key.lower())
|
||
|
|
||
|
def __contains__(self, key):
|
||
|
return dict.__contains__(self, key.lower())
|
||
|
|
||
|
def to_glueformula_list(self, glue_dict):
|
||
|
depgraph = self.to_depgraph()
|
||
|
return glue_dict.to_glueformula_list(depgraph)
|
||
|
|
||
|
def to_depgraph(self, rel=None):
|
||
|
from nltk.parse.dependencygraph import DependencyGraph
|
||
|
|
||
|
depgraph = DependencyGraph()
|
||
|
nodes = depgraph.nodes
|
||
|
|
||
|
self._to_depgraph(nodes, 0, 'ROOT')
|
||
|
|
||
|
# Add all the dependencies for all the nodes
|
||
|
for address, node in nodes.items():
|
||
|
for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'):
|
||
|
if n2['head'] == address:
|
||
|
relation = n2['rel']
|
||
|
node['deps'].setdefault(relation, [])
|
||
|
node['deps'][relation].append(n2['address'])
|
||
|
|
||
|
depgraph.root = nodes[1]
|
||
|
|
||
|
return depgraph
|
||
|
|
||
|
def _to_depgraph(self, nodes, head, rel):
|
||
|
index = len(nodes)
|
||
|
|
||
|
nodes[index].update(
|
||
|
{
|
||
|
'address': index,
|
||
|
'word': self.pred[0],
|
||
|
'tag': self.pred[1],
|
||
|
'head': head,
|
||
|
'rel': rel,
|
||
|
}
|
||
|
)
|
||
|
|
||
|
for feature in sorted(self):
|
||
|
for item in sorted(self[feature]):
|
||
|
if isinstance(item, FStructure):
|
||
|
item._to_depgraph(nodes, index, feature)
|
||
|
elif isinstance(item, tuple):
|
||
|
new_index = len(nodes)
|
||
|
nodes[new_index].update(
|
||
|
{
|
||
|
'address': new_index,
|
||
|
'word': item[0],
|
||
|
'tag': item[1],
|
||
|
'head': index,
|
||
|
'rel': feature,
|
||
|
}
|
||
|
)
|
||
|
elif isinstance(item, list):
|
||
|
for n in item:
|
||
|
n._to_depgraph(nodes, index, feature)
|
||
|
else:
|
||
|
raise Exception(
|
||
|
'feature %s is not an FStruct, a list, or a tuple' % feature
|
||
|
)
|
||
|
|
||
|
@staticmethod
|
||
|
def read_depgraph(depgraph):
|
||
|
return FStructure._read_depgraph(depgraph.root, depgraph)
|
||
|
|
||
|
@staticmethod
|
||
|
def _read_depgraph(node, depgraph, label_counter=None, parent=None):
|
||
|
if not label_counter:
|
||
|
label_counter = Counter()
|
||
|
|
||
|
if node['rel'].lower() in ['spec', 'punct']:
|
||
|
# the value of a 'spec' entry is a word, not an FStructure
|
||
|
return (node['word'], node['tag'])
|
||
|
|
||
|
else:
|
||
|
fstruct = FStructure()
|
||
|
fstruct.pred = None
|
||
|
fstruct.label = FStructure._make_label(label_counter.get())
|
||
|
|
||
|
fstruct.parent = parent
|
||
|
|
||
|
word, tag = node['word'], node['tag']
|
||
|
if tag[:2] == 'VB':
|
||
|
if tag[2:3] == 'D':
|
||
|
fstruct.safeappend('tense', ('PAST', 'tense'))
|
||
|
fstruct.pred = (word, tag[:2])
|
||
|
|
||
|
if not fstruct.pred:
|
||
|
fstruct.pred = (word, tag)
|
||
|
|
||
|
children = [depgraph.nodes[idx] for idx in chain(*node['deps'].values())]
|
||
|
for child in children:
|
||
|
fstruct.safeappend(
|
||
|
child['rel'],
|
||
|
FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
|
||
|
)
|
||
|
|
||
|
return fstruct
|
||
|
|
||
|
@staticmethod
|
||
|
def _make_label(value):
|
||
|
"""
|
||
|
Pick an alphabetic character as identifier for an entity in the model.
|
||
|
|
||
|
:param value: where to index into the list of characters
|
||
|
:type value: int
|
||
|
"""
|
||
|
letter = [
|
||
|
'f',
|
||
|
'g',
|
||
|
'h',
|
||
|
'i',
|
||
|
'j',
|
||
|
'k',
|
||
|
'l',
|
||
|
'm',
|
||
|
'n',
|
||
|
'o',
|
||
|
'p',
|
||
|
'q',
|
||
|
'r',
|
||
|
's',
|
||
|
't',
|
||
|
'u',
|
||
|
'v',
|
||
|
'w',
|
||
|
'x',
|
||
|
'y',
|
||
|
'z',
|
||
|
'a',
|
||
|
'b',
|
||
|
'c',
|
||
|
'd',
|
||
|
'e',
|
||
|
][value - 1]
|
||
|
num = int(value) // 26
|
||
|
if num > 0:
|
||
|
return letter + str(num)
|
||
|
else:
|
||
|
return letter
|
||
|
|
||
|
def __repr__(self):
|
||
|
return self.__unicode__().replace('\n', '')
|
||
|
|
||
|
def __str__(self):
|
||
|
return self.pretty_format()
|
||
|
|
||
|
def pretty_format(self, indent=3):
|
||
|
try:
|
||
|
accum = '%s:[' % self.label
|
||
|
except NameError:
|
||
|
accum = '['
|
||
|
try:
|
||
|
accum += 'pred \'%s\'' % (self.pred[0])
|
||
|
except NameError:
|
||
|
pass
|
||
|
|
||
|
for feature in sorted(self):
|
||
|
for item in self[feature]:
|
||
|
if isinstance(item, FStructure):
|
||
|
next_indent = indent + len(feature) + 3 + len(self.label)
|
||
|
accum += '\n%s%s %s' % (
|
||
|
' ' * (indent),
|
||
|
feature,
|
||
|
item.pretty_format(next_indent),
|
||
|
)
|
||
|
elif isinstance(item, tuple):
|
||
|
accum += '\n%s%s \'%s\'' % (' ' * (indent), feature, item[0])
|
||
|
elif isinstance(item, list):
|
||
|
accum += '\n%s%s {%s}' % (
|
||
|
' ' * (indent),
|
||
|
feature,
|
||
|
('\n%s' % (' ' * (indent + len(feature) + 2))).join(item),
|
||
|
)
|
||
|
else: # ERROR
|
||
|
raise Exception(
|
||
|
'feature %s is not an FStruct, a list, or a tuple' % feature
|
||
|
)
|
||
|
return accum + ']'
|
||
|
|
||
|
|
||
|
def demo_read_depgraph():
|
||
|
from nltk.parse.dependencygraph import DependencyGraph
|
||
|
|
||
|
dg1 = DependencyGraph(
|
||
|
"""\
|
||
|
Esso NNP 2 SUB
|
||
|
said VBD 0 ROOT
|
||
|
the DT 5 NMOD
|
||
|
Whiting NNP 5 NMOD
|
||
|
field NN 6 SUB
|
||
|
started VBD 2 VMOD
|
||
|
production NN 6 OBJ
|
||
|
Tuesday NNP 6 VMOD
|
||
|
"""
|
||
|
)
|
||
|
dg2 = DependencyGraph(
|
||
|
"""\
|
||
|
John NNP 2 SUB
|
||
|
sees VBP 0 ROOT
|
||
|
Mary NNP 2 OBJ
|
||
|
"""
|
||
|
)
|
||
|
dg3 = DependencyGraph(
|
||
|
"""\
|
||
|
a DT 2 SPEC
|
||
|
man NN 3 SUBJ
|
||
|
walks VB 0 ROOT
|
||
|
"""
|
||
|
)
|
||
|
dg4 = DependencyGraph(
|
||
|
"""\
|
||
|
every DT 2 SPEC
|
||
|
girl NN 3 SUBJ
|
||
|
chases VB 0 ROOT
|
||
|
a DT 5 SPEC
|
||
|
dog NN 3 OBJ
|
||
|
"""
|
||
|
)
|
||
|
|
||
|
depgraphs = [dg1, dg2, dg3, dg4]
|
||
|
for dg in depgraphs:
|
||
|
print(FStructure.read_depgraph(dg))
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
demo_read_depgraph()
|