You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
862 lines
29 KiB
Python
862 lines
29 KiB
Python
5 years ago
|
# Natural Language Toolkit: CFG visualization
|
||
|
#
|
||
|
# Copyright (C) 2001-2019 NLTK Project
|
||
|
# Author: Edward Loper <edloper@gmail.com>
|
||
|
# URL: <http://nltk.org/>
|
||
|
# For license information, see LICENSE.TXT
|
||
|
|
||
|
"""
|
||
|
Visualization tools for CFGs.
|
||
|
"""
|
||
|
|
||
|
# Idea for a nice demo:
|
||
|
# - 3 panes: grammar, treelet, working area
|
||
|
# - grammar is a list of productions
|
||
|
# - when you select a production, the treelet that it licenses appears
|
||
|
# in the treelet area
|
||
|
# - the working area has the text on the bottom, and S at top. When
|
||
|
# you select a production, it shows (ghosted) the locations where
|
||
|
# that production's treelet could be attached to either the text
|
||
|
# or the tree rooted at S.
|
||
|
# - the user can drag the treelet onto one of those (or click on them?)
|
||
|
# - the user can delete pieces of the tree from the working area
|
||
|
# (right click?)
|
||
|
# - connecting top to bottom? drag one NP onto another?
|
||
|
#
|
||
|
# +-------------------------------------------------------------+
|
||
|
# | S -> NP VP | S |
|
||
|
# |[NP -> Det N ]| / \ |
|
||
|
# | ... | NP VP |
|
||
|
# | N -> 'dog' | |
|
||
|
# | N -> 'cat' | |
|
||
|
# | ... | |
|
||
|
# +--------------+ |
|
||
|
# | NP | Det N |
|
||
|
# | / \ | | | |
|
||
|
# | Det N | the cat saw the dog |
|
||
|
# | | |
|
||
|
# +--------------+----------------------------------------------+
|
||
|
#
|
||
|
# Operations:
|
||
|
# - connect a new treelet -- drag or click shadow
|
||
|
# - delete a treelet -- right click
|
||
|
# - if only connected to top, delete everything below
|
||
|
# - if only connected to bottom, delete everything above
|
||
|
# - connect top & bottom -- drag a leaf to a root or a root to a leaf
|
||
|
# - disconnect top & bottom -- right click
|
||
|
# - if connected to top & bottom, then disconnect
|
||
|
|
||
|
import re
|
||
|
|
||
|
from six import string_types
|
||
|
from six.moves.tkinter import (
|
||
|
Button,
|
||
|
Canvas,
|
||
|
Entry,
|
||
|
Frame,
|
||
|
IntVar,
|
||
|
Label,
|
||
|
Scrollbar,
|
||
|
Text,
|
||
|
Tk,
|
||
|
Toplevel,
|
||
|
)
|
||
|
|
||
|
from nltk.grammar import CFG, _read_cfg_production, Nonterminal, nonterminals
|
||
|
from nltk.tree import Tree
|
||
|
from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment
|
||
|
from nltk.draw.util import (
|
||
|
CanvasFrame,
|
||
|
ColorizedList,
|
||
|
ShowText,
|
||
|
SymbolWidget,
|
||
|
TextWidget,
|
||
|
)
|
||
|
|
||
|
######################################################################
|
||
|
# Production List
|
||
|
######################################################################
|
||
|
|
||
|
|
||
|
class ProductionList(ColorizedList):
|
||
|
ARROW = SymbolWidget.SYMBOLS['rightarrow']
|
||
|
|
||
|
def _init_colortags(self, textwidget, options):
|
||
|
textwidget.tag_config('terminal', foreground='#006000')
|
||
|
textwidget.tag_config('arrow', font='symbol', underline='0')
|
||
|
textwidget.tag_config(
|
||
|
'nonterminal', foreground='blue', font=('helvetica', -12, 'bold')
|
||
|
)
|
||
|
|
||
|
def _item_repr(self, item):
|
||
|
contents = []
|
||
|
contents.append(('%s\t' % item.lhs(), 'nonterminal'))
|
||
|
contents.append((self.ARROW, 'arrow'))
|
||
|
for elt in item.rhs():
|
||
|
if isinstance(elt, Nonterminal):
|
||
|
contents.append((' %s' % elt.symbol(), 'nonterminal'))
|
||
|
else:
|
||
|
contents.append((' %r' % elt, 'terminal'))
|
||
|
return contents
|
||
|
|
||
|
|
||
|
######################################################################
|
||
|
# CFG Editor
|
||
|
######################################################################
|
||
|
|
||
|
_CFGEditor_HELP = """
|
||
|
|
||
|
The CFG Editor can be used to create or modify context free grammars.
|
||
|
A context free grammar consists of a start symbol and a list of
|
||
|
productions. The start symbol is specified by the text entry field in
|
||
|
the upper right hand corner of the editor; and the list of productions
|
||
|
are specified in the main text editing box.
|
||
|
|
||
|
Every non-blank line specifies a single production. Each production
|
||
|
has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
|
||
|
is a list of nonterminals and terminals.
|
||
|
|
||
|
Nonterminals must be a single word, such as S or NP or NP_subj.
|
||
|
Currently, nonterminals must consists of alphanumeric characters and
|
||
|
underscores (_). Nonterminals are colored blue. If you place the
|
||
|
mouse over any nonterminal, then all occurrences of that nonterminal
|
||
|
will be highlighted.
|
||
|
|
||
|
Terminals must be surrounded by single quotes (') or double
|
||
|
quotes(\"). For example, "dog" and "New York" are terminals.
|
||
|
Currently, the string within the quotes must consist of alphanumeric
|
||
|
characters, underscores, and spaces.
|
||
|
|
||
|
To enter a new production, go to a blank line, and type a nonterminal,
|
||
|
followed by an arrow (->), followed by a sequence of terminals and
|
||
|
nonterminals. Note that "->" (dash + greater-than) is automatically
|
||
|
converted to an arrow symbol. When you move your cursor to a
|
||
|
different line, your production will automatically be colorized. If
|
||
|
there are any errors, they will be highlighted in red.
|
||
|
|
||
|
Note that the order of the productions is significant for some
|
||
|
algorithms. To re-order the productions, use cut and paste to move
|
||
|
them.
|
||
|
|
||
|
Use the buttons at the bottom of the window when you are done editing
|
||
|
the CFG:
|
||
|
- Ok: apply the new CFG, and exit the editor.
|
||
|
- Apply: apply the new CFG, and do not exit the editor.
|
||
|
- Reset: revert to the original CFG, and do not exit the editor.
|
||
|
- Cancel: revert to the original CFG, and exit the editor.
|
||
|
|
||
|
"""
|
||
|
|
||
|
|
||
|
class CFGEditor(object):
|
||
|
"""
|
||
|
A dialog window for creating and editing context free grammars.
|
||
|
``CFGEditor`` imposes the following restrictions:
|
||
|
|
||
|
- All nonterminals must be strings consisting of word
|
||
|
characters.
|
||
|
- All terminals must be strings consisting of word characters
|
||
|
and space characters.
|
||
|
"""
|
||
|
|
||
|
# Regular expressions used by _analyze_line. Precompile them, so
|
||
|
# we can process the text faster.
|
||
|
ARROW = SymbolWidget.SYMBOLS['rightarrow']
|
||
|
_LHS_RE = re.compile(r"(^\s*\w+\s*)(->|(" + ARROW + "))")
|
||
|
_ARROW_RE = re.compile("\s*(->|(" + ARROW + "))\s*")
|
||
|
_PRODUCTION_RE = re.compile(
|
||
|
r"(^\s*\w+\s*)"
|
||
|
+ "(->|(" # LHS
|
||
|
+ ARROW
|
||
|
+ "))\s*"
|
||
|
+ r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$" # arrow
|
||
|
) # RHS
|
||
|
_TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|(" + ARROW + ")")
|
||
|
_BOLD = ('helvetica', -12, 'bold')
|
||
|
|
||
|
def __init__(self, parent, cfg=None, set_cfg_callback=None):
|
||
|
self._parent = parent
|
||
|
if cfg is not None:
|
||
|
self._cfg = cfg
|
||
|
else:
|
||
|
self._cfg = CFG(Nonterminal('S'), [])
|
||
|
self._set_cfg_callback = set_cfg_callback
|
||
|
|
||
|
self._highlight_matching_nonterminals = 1
|
||
|
|
||
|
# Create the top-level window.
|
||
|
self._top = Toplevel(parent)
|
||
|
self._init_bindings()
|
||
|
|
||
|
self._init_startframe()
|
||
|
self._startframe.pack(side='top', fill='x', expand=0)
|
||
|
self._init_prodframe()
|
||
|
self._prodframe.pack(side='top', fill='both', expand=1)
|
||
|
self._init_buttons()
|
||
|
self._buttonframe.pack(side='bottom', fill='x', expand=0)
|
||
|
|
||
|
self._textwidget.focus()
|
||
|
|
||
|
def _init_startframe(self):
|
||
|
frame = self._startframe = Frame(self._top)
|
||
|
self._start = Entry(frame)
|
||
|
self._start.pack(side='right')
|
||
|
Label(frame, text='Start Symbol:').pack(side='right')
|
||
|
Label(frame, text='Productions:').pack(side='left')
|
||
|
self._start.insert(0, self._cfg.start().symbol())
|
||
|
|
||
|
def _init_buttons(self):
|
||
|
frame = self._buttonframe = Frame(self._top)
|
||
|
Button(frame, text='Ok', command=self._ok, underline=0, takefocus=0).pack(
|
||
|
side='left'
|
||
|
)
|
||
|
Button(frame, text='Apply', command=self._apply, underline=0, takefocus=0).pack(
|
||
|
side='left'
|
||
|
)
|
||
|
Button(frame, text='Reset', command=self._reset, underline=0, takefocus=0).pack(
|
||
|
side='left'
|
||
|
)
|
||
|
Button(
|
||
|
frame, text='Cancel', command=self._cancel, underline=0, takefocus=0
|
||
|
).pack(side='left')
|
||
|
Button(frame, text='Help', command=self._help, underline=0, takefocus=0).pack(
|
||
|
side='right'
|
||
|
)
|
||
|
|
||
|
def _init_bindings(self):
|
||
|
self._top.title('CFG Editor')
|
||
|
self._top.bind('<Control-q>', self._cancel)
|
||
|
self._top.bind('<Alt-q>', self._cancel)
|
||
|
self._top.bind('<Control-d>', self._cancel)
|
||
|
# self._top.bind('<Control-x>', self._cancel)
|
||
|
self._top.bind('<Alt-x>', self._cancel)
|
||
|
self._top.bind('<Escape>', self._cancel)
|
||
|
# self._top.bind('<Control-c>', self._cancel)
|
||
|
self._top.bind('<Alt-c>', self._cancel)
|
||
|
|
||
|
self._top.bind('<Control-o>', self._ok)
|
||
|
self._top.bind('<Alt-o>', self._ok)
|
||
|
self._top.bind('<Control-a>', self._apply)
|
||
|
self._top.bind('<Alt-a>', self._apply)
|
||
|
self._top.bind('<Control-r>', self._reset)
|
||
|
self._top.bind('<Alt-r>', self._reset)
|
||
|
self._top.bind('<Control-h>', self._help)
|
||
|
self._top.bind('<Alt-h>', self._help)
|
||
|
self._top.bind('<F1>', self._help)
|
||
|
|
||
|
def _init_prodframe(self):
|
||
|
self._prodframe = Frame(self._top)
|
||
|
|
||
|
# Create the basic Text widget & scrollbar.
|
||
|
self._textwidget = Text(
|
||
|
self._prodframe, background='#e0e0e0', exportselection=1
|
||
|
)
|
||
|
self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient='vertical')
|
||
|
self._textwidget.config(yscrollcommand=self._textscroll.set)
|
||
|
self._textscroll.config(command=self._textwidget.yview)
|
||
|
self._textscroll.pack(side='right', fill='y')
|
||
|
self._textwidget.pack(expand=1, fill='both', side='left')
|
||
|
|
||
|
# Initialize the colorization tags. Each nonterminal gets its
|
||
|
# own tag, so they aren't listed here.
|
||
|
self._textwidget.tag_config('terminal', foreground='#006000')
|
||
|
self._textwidget.tag_config('arrow', font='symbol')
|
||
|
self._textwidget.tag_config('error', background='red')
|
||
|
|
||
|
# Keep track of what line they're on. We use that to remember
|
||
|
# to re-analyze a line whenever they leave it.
|
||
|
self._linenum = 0
|
||
|
|
||
|
# Expand "->" to an arrow.
|
||
|
self._top.bind('>', self._replace_arrows)
|
||
|
|
||
|
# Re-colorize lines when appropriate.
|
||
|
self._top.bind('<<Paste>>', self._analyze)
|
||
|
self._top.bind('<KeyPress>', self._check_analyze)
|
||
|
self._top.bind('<ButtonPress>', self._check_analyze)
|
||
|
|
||
|
# Tab cycles focus. (why doesn't this work??)
|
||
|
def cycle(e, textwidget=self._textwidget):
|
||
|
textwidget.tk_focusNext().focus()
|
||
|
|
||
|
self._textwidget.bind('<Tab>', cycle)
|
||
|
|
||
|
prod_tuples = [(p.lhs(), [p.rhs()]) for p in self._cfg.productions()]
|
||
|
for i in range(len(prod_tuples) - 1, 0, -1):
|
||
|
if prod_tuples[i][0] == prod_tuples[i - 1][0]:
|
||
|
if () in prod_tuples[i][1]:
|
||
|
continue
|
||
|
if () in prod_tuples[i - 1][1]:
|
||
|
continue
|
||
|
print(prod_tuples[i - 1][1])
|
||
|
print(prod_tuples[i][1])
|
||
|
prod_tuples[i - 1][1].extend(prod_tuples[i][1])
|
||
|
del prod_tuples[i]
|
||
|
|
||
|
for lhs, rhss in prod_tuples:
|
||
|
print(lhs, rhss)
|
||
|
s = '%s ->' % lhs
|
||
|
for rhs in rhss:
|
||
|
for elt in rhs:
|
||
|
if isinstance(elt, Nonterminal):
|
||
|
s += ' %s' % elt
|
||
|
else:
|
||
|
s += ' %r' % elt
|
||
|
s += ' |'
|
||
|
s = s[:-2] + '\n'
|
||
|
self._textwidget.insert('end', s)
|
||
|
|
||
|
self._analyze()
|
||
|
|
||
|
# # Add the producitons to the text widget, and colorize them.
|
||
|
# prod_by_lhs = {}
|
||
|
# for prod in self._cfg.productions():
|
||
|
# if len(prod.rhs()) > 0:
|
||
|
# prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
|
||
|
# for (lhs, prods) in prod_by_lhs.items():
|
||
|
# self._textwidget.insert('end', '%s ->' % lhs)
|
||
|
# self._textwidget.insert('end', self._rhs(prods[0]))
|
||
|
# for prod in prods[1:]:
|
||
|
# print '\t|'+self._rhs(prod),
|
||
|
# self._textwidget.insert('end', '\t|'+self._rhs(prod))
|
||
|
# print
|
||
|
# self._textwidget.insert('end', '\n')
|
||
|
# for prod in self._cfg.productions():
|
||
|
# if len(prod.rhs()) == 0:
|
||
|
# self._textwidget.insert('end', '%s' % prod)
|
||
|
# self._analyze()
|
||
|
|
||
|
# def _rhs(self, prod):
|
||
|
# s = ''
|
||
|
# for elt in prod.rhs():
|
||
|
# if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
|
||
|
# else: s += ' %r' % elt
|
||
|
# return s
|
||
|
|
||
|
def _clear_tags(self, linenum):
|
||
|
"""
|
||
|
Remove all tags (except ``arrow`` and ``sel``) from the given
|
||
|
line of the text widget used for editing the productions.
|
||
|
"""
|
||
|
start = '%d.0' % linenum
|
||
|
end = '%d.end' % linenum
|
||
|
for tag in self._textwidget.tag_names():
|
||
|
if tag not in ('arrow', 'sel'):
|
||
|
self._textwidget.tag_remove(tag, start, end)
|
||
|
|
||
|
def _check_analyze(self, *e):
|
||
|
"""
|
||
|
Check if we've moved to a new line. If we have, then remove
|
||
|
all colorization from the line we moved to, and re-colorize
|
||
|
the line that we moved from.
|
||
|
"""
|
||
|
linenum = int(self._textwidget.index('insert').split('.')[0])
|
||
|
if linenum != self._linenum:
|
||
|
self._clear_tags(linenum)
|
||
|
self._analyze_line(self._linenum)
|
||
|
self._linenum = linenum
|
||
|
|
||
|
def _replace_arrows(self, *e):
|
||
|
"""
|
||
|
Replace any ``'->'`` text strings with arrows (char \\256, in
|
||
|
symbol font). This searches the whole buffer, but is fast
|
||
|
enough to be done anytime they press '>'.
|
||
|
"""
|
||
|
arrow = '1.0'
|
||
|
while True:
|
||
|
arrow = self._textwidget.search('->', arrow, 'end+1char')
|
||
|
if arrow == '':
|
||
|
break
|
||
|
self._textwidget.delete(arrow, arrow + '+2char')
|
||
|
self._textwidget.insert(arrow, self.ARROW, 'arrow')
|
||
|
self._textwidget.insert(arrow, '\t')
|
||
|
|
||
|
arrow = '1.0'
|
||
|
while True:
|
||
|
arrow = self._textwidget.search(self.ARROW, arrow + '+1char', 'end+1char')
|
||
|
if arrow == '':
|
||
|
break
|
||
|
self._textwidget.tag_add('arrow', arrow, arrow + '+1char')
|
||
|
|
||
|
def _analyze_token(self, match, linenum):
|
||
|
"""
|
||
|
Given a line number and a regexp match for a token on that
|
||
|
line, colorize the token. Note that the regexp match gives us
|
||
|
the token's text, start index (on the line), and end index (on
|
||
|
the line).
|
||
|
"""
|
||
|
# What type of token is it?
|
||
|
if match.group()[0] in "'\"":
|
||
|
tag = 'terminal'
|
||
|
elif match.group() in ('->', self.ARROW):
|
||
|
tag = 'arrow'
|
||
|
else:
|
||
|
# If it's a nonterminal, then set up new bindings, so we
|
||
|
# can highlight all instances of that nonterminal when we
|
||
|
# put the mouse over it.
|
||
|
tag = 'nonterminal_' + match.group()
|
||
|
if tag not in self._textwidget.tag_names():
|
||
|
self._init_nonterminal_tag(tag)
|
||
|
|
||
|
start = '%d.%d' % (linenum, match.start())
|
||
|
end = '%d.%d' % (linenum, match.end())
|
||
|
self._textwidget.tag_add(tag, start, end)
|
||
|
|
||
|
def _init_nonterminal_tag(self, tag, foreground='blue'):
|
||
|
self._textwidget.tag_config(tag, foreground=foreground, font=CFGEditor._BOLD)
|
||
|
if not self._highlight_matching_nonterminals:
|
||
|
return
|
||
|
|
||
|
def enter(e, textwidget=self._textwidget, tag=tag):
|
||
|
textwidget.tag_config(tag, background='#80ff80')
|
||
|
|
||
|
def leave(e, textwidget=self._textwidget, tag=tag):
|
||
|
textwidget.tag_config(tag, background='')
|
||
|
|
||
|
self._textwidget.tag_bind(tag, '<Enter>', enter)
|
||
|
self._textwidget.tag_bind(tag, '<Leave>', leave)
|
||
|
|
||
|
def _analyze_line(self, linenum):
|
||
|
"""
|
||
|
Colorize a given line.
|
||
|
"""
|
||
|
# Get rid of any tags that were previously on the line.
|
||
|
self._clear_tags(linenum)
|
||
|
|
||
|
# Get the line line's text string.
|
||
|
line = self._textwidget.get(repr(linenum) + '.0', repr(linenum) + '.end')
|
||
|
|
||
|
# If it's a valid production, then colorize each token.
|
||
|
if CFGEditor._PRODUCTION_RE.match(line):
|
||
|
# It's valid; Use _TOKEN_RE to tokenize the production,
|
||
|
# and call analyze_token on each token.
|
||
|
def analyze_token(match, self=self, linenum=linenum):
|
||
|
self._analyze_token(match, linenum)
|
||
|
return ''
|
||
|
|
||
|
CFGEditor._TOKEN_RE.sub(analyze_token, line)
|
||
|
elif line.strip() != '':
|
||
|
# It's invalid; show the user where the error is.
|
||
|
self._mark_error(linenum, line)
|
||
|
|
||
|
def _mark_error(self, linenum, line):
|
||
|
"""
|
||
|
Mark the location of an error in a line.
|
||
|
"""
|
||
|
arrowmatch = CFGEditor._ARROW_RE.search(line)
|
||
|
if not arrowmatch:
|
||
|
# If there's no arrow at all, highlight the whole line.
|
||
|
start = '%d.0' % linenum
|
||
|
end = '%d.end' % linenum
|
||
|
elif not CFGEditor._LHS_RE.match(line):
|
||
|
# Otherwise, if the LHS is bad, highlight it.
|
||
|
start = '%d.0' % linenum
|
||
|
end = '%d.%d' % (linenum, arrowmatch.start())
|
||
|
else:
|
||
|
# Otherwise, highlight the RHS.
|
||
|
start = '%d.%d' % (linenum, arrowmatch.end())
|
||
|
end = '%d.end' % linenum
|
||
|
|
||
|
# If we're highlighting 0 chars, highlight the whole line.
|
||
|
if self._textwidget.compare(start, '==', end):
|
||
|
start = '%d.0' % linenum
|
||
|
end = '%d.end' % linenum
|
||
|
self._textwidget.tag_add('error', start, end)
|
||
|
|
||
|
def _analyze(self, *e):
|
||
|
"""
|
||
|
Replace ``->`` with arrows, and colorize the entire buffer.
|
||
|
"""
|
||
|
self._replace_arrows()
|
||
|
numlines = int(self._textwidget.index('end').split('.')[0])
|
||
|
for linenum in range(1, numlines + 1): # line numbers start at 1.
|
||
|
self._analyze_line(linenum)
|
||
|
|
||
|
def _parse_productions(self):
|
||
|
"""
|
||
|
Parse the current contents of the textwidget buffer, to create
|
||
|
a list of productions.
|
||
|
"""
|
||
|
productions = []
|
||
|
|
||
|
# Get the text, normalize it, and split it into lines.
|
||
|
text = self._textwidget.get('1.0', 'end')
|
||
|
text = re.sub(self.ARROW, '->', text)
|
||
|
text = re.sub('\t', ' ', text)
|
||
|
lines = text.split('\n')
|
||
|
|
||
|
# Convert each line to a CFG production
|
||
|
for line in lines:
|
||
|
line = line.strip()
|
||
|
if line == '':
|
||
|
continue
|
||
|
productions += _read_cfg_production(line)
|
||
|
# if line.strip() == '': continue
|
||
|
# if not CFGEditor._PRODUCTION_RE.match(line):
|
||
|
# raise ValueError('Bad production string %r' % line)
|
||
|
#
|
||
|
# (lhs_str, rhs_str) = line.split('->')
|
||
|
# lhs = Nonterminal(lhs_str.strip())
|
||
|
# rhs = []
|
||
|
# def parse_token(match, rhs=rhs):
|
||
|
# token = match.group()
|
||
|
# if token[0] in "'\"": rhs.append(token[1:-1])
|
||
|
# else: rhs.append(Nonterminal(token))
|
||
|
# return ''
|
||
|
# CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
|
||
|
#
|
||
|
# productions.append(Production(lhs, *rhs))
|
||
|
|
||
|
return productions
|
||
|
|
||
|
def _destroy(self, *e):
|
||
|
if self._top is None:
|
||
|
return
|
||
|
self._top.destroy()
|
||
|
self._top = None
|
||
|
|
||
|
def _ok(self, *e):
|
||
|
self._apply()
|
||
|
self._destroy()
|
||
|
|
||
|
def _apply(self, *e):
|
||
|
productions = self._parse_productions()
|
||
|
start = Nonterminal(self._start.get())
|
||
|
cfg = CFG(start, productions)
|
||
|
if self._set_cfg_callback is not None:
|
||
|
self._set_cfg_callback(cfg)
|
||
|
|
||
|
def _reset(self, *e):
|
||
|
self._textwidget.delete('1.0', 'end')
|
||
|
for production in self._cfg.productions():
|
||
|
self._textwidget.insert('end', '%s\n' % production)
|
||
|
self._analyze()
|
||
|
if self._set_cfg_callback is not None:
|
||
|
self._set_cfg_callback(self._cfg)
|
||
|
|
||
|
def _cancel(self, *e):
|
||
|
try:
|
||
|
self._reset()
|
||
|
except:
|
||
|
pass
|
||
|
self._destroy()
|
||
|
|
||
|
def _help(self, *e):
|
||
|
# The default font's not very legible; try using 'fixed' instead.
|
||
|
try:
|
||
|
ShowText(
|
||
|
self._parent,
|
||
|
'Help: Chart Parser Demo',
|
||
|
(_CFGEditor_HELP).strip(),
|
||
|
width=75,
|
||
|
font='fixed',
|
||
|
)
|
||
|
except:
|
||
|
ShowText(
|
||
|
self._parent,
|
||
|
'Help: Chart Parser Demo',
|
||
|
(_CFGEditor_HELP).strip(),
|
||
|
width=75,
|
||
|
)
|
||
|
|
||
|
|
||
|
######################################################################
|
||
|
# New Demo (built tree based on cfg)
|
||
|
######################################################################
|
||
|
|
||
|
|
||
|
class CFGDemo(object):
|
||
|
def __init__(self, grammar, text):
|
||
|
self._grammar = grammar
|
||
|
self._text = text
|
||
|
|
||
|
# Set up the main window.
|
||
|
self._top = Tk()
|
||
|
self._top.title('Context Free Grammar Demo')
|
||
|
|
||
|
# Base font size
|
||
|
self._size = IntVar(self._top)
|
||
|
self._size.set(12) # = medium
|
||
|
|
||
|
# Set up the key bindings
|
||
|
self._init_bindings(self._top)
|
||
|
|
||
|
# Create the basic frames
|
||
|
frame1 = Frame(self._top)
|
||
|
frame1.pack(side='left', fill='y', expand=0)
|
||
|
self._init_menubar(self._top)
|
||
|
self._init_buttons(self._top)
|
||
|
self._init_grammar(frame1)
|
||
|
self._init_treelet(frame1)
|
||
|
self._init_workspace(self._top)
|
||
|
|
||
|
# //////////////////////////////////////////////////
|
||
|
# Initialization
|
||
|
# //////////////////////////////////////////////////
|
||
|
|
||
|
def _init_bindings(self, top):
|
||
|
top.bind('<Control-q>', self.destroy)
|
||
|
|
||
|
def _init_menubar(self, parent):
|
||
|
pass
|
||
|
|
||
|
def _init_buttons(self, parent):
|
||
|
pass
|
||
|
|
||
|
def _init_grammar(self, parent):
|
||
|
self._prodlist = ProductionList(parent, self._grammar, width=20)
|
||
|
self._prodlist.pack(side='top', fill='both', expand=1)
|
||
|
self._prodlist.focus()
|
||
|
self._prodlist.add_callback('select', self._selectprod_cb)
|
||
|
self._prodlist.add_callback('move', self._selectprod_cb)
|
||
|
|
||
|
def _init_treelet(self, parent):
|
||
|
self._treelet_canvas = Canvas(parent, background='white')
|
||
|
self._treelet_canvas.pack(side='bottom', fill='x')
|
||
|
self._treelet = None
|
||
|
|
||
|
def _init_workspace(self, parent):
|
||
|
self._workspace = CanvasFrame(parent, background='white')
|
||
|
self._workspace.pack(side='right', fill='both', expand=1)
|
||
|
self._tree = None
|
||
|
self.reset_workspace()
|
||
|
|
||
|
# //////////////////////////////////////////////////
|
||
|
# Workspace
|
||
|
# //////////////////////////////////////////////////
|
||
|
|
||
|
def reset_workspace(self):
|
||
|
c = self._workspace.canvas()
|
||
|
fontsize = int(self._size.get())
|
||
|
node_font = ('helvetica', -(fontsize + 4), 'bold')
|
||
|
leaf_font = ('helvetica', -(fontsize + 2))
|
||
|
|
||
|
# Remove the old tree
|
||
|
if self._tree is not None:
|
||
|
self._workspace.remove_widget(self._tree)
|
||
|
|
||
|
# The root of the tree.
|
||
|
start = self._grammar.start().symbol()
|
||
|
rootnode = TextWidget(c, start, font=node_font, draggable=1)
|
||
|
|
||
|
# The leaves of the tree.
|
||
|
leaves = []
|
||
|
for word in self._text:
|
||
|
leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
|
||
|
|
||
|
# Put it all together into one tree
|
||
|
self._tree = TreeSegmentWidget(c, rootnode, leaves, color='white')
|
||
|
|
||
|
# Add it to the workspace.
|
||
|
self._workspace.add_widget(self._tree)
|
||
|
|
||
|
# Move the leaves to the bottom of the workspace.
|
||
|
for leaf in leaves:
|
||
|
leaf.move(0, 100)
|
||
|
|
||
|
# self._nodes = {start:1}
|
||
|
# self._leaves = dict([(l,1) for l in leaves])
|
||
|
|
||
|
def workspace_markprod(self, production):
|
||
|
pass
|
||
|
|
||
|
def _markproduction(self, prod, tree=None):
|
||
|
if tree is None:
|
||
|
tree = self._tree
|
||
|
for i in range(len(tree.subtrees()) - len(prod.rhs())):
|
||
|
if tree['color', i] == 'white':
|
||
|
self._markproduction # FIXME: Is this necessary at all?
|
||
|
|
||
|
for j, node in enumerate(prod.rhs()):
|
||
|
widget = tree.subtrees()[i + j]
|
||
|
if (
|
||
|
isinstance(node, Nonterminal)
|
||
|
and isinstance(widget, TreeSegmentWidget)
|
||
|
and node.symbol == widget.label().text()
|
||
|
):
|
||
|
pass # matching nonterminal
|
||
|
elif (
|
||
|
isinstance(node, string_types)
|
||
|
and isinstance(widget, TextWidget)
|
||
|
and node == widget.text()
|
||
|
):
|
||
|
pass # matching nonterminal
|
||
|
else:
|
||
|
break
|
||
|
else:
|
||
|
# Everything matched!
|
||
|
print('MATCH AT', i)
|
||
|
|
||
|
# //////////////////////////////////////////////////
|
||
|
# Grammar
|
||
|
# //////////////////////////////////////////////////
|
||
|
|
||
|
def _selectprod_cb(self, production):
|
||
|
canvas = self._treelet_canvas
|
||
|
|
||
|
self._prodlist.highlight(production)
|
||
|
if self._treelet is not None:
|
||
|
self._treelet.destroy()
|
||
|
|
||
|
# Convert the production to a tree.
|
||
|
rhs = production.rhs()
|
||
|
for (i, elt) in enumerate(rhs):
|
||
|
if isinstance(elt, Nonterminal):
|
||
|
elt = Tree(elt)
|
||
|
tree = Tree(production.lhs().symbol(), *rhs)
|
||
|
|
||
|
# Draw the tree in the treelet area.
|
||
|
fontsize = int(self._size.get())
|
||
|
node_font = ('helvetica', -(fontsize + 4), 'bold')
|
||
|
leaf_font = ('helvetica', -(fontsize + 2))
|
||
|
self._treelet = tree_to_treesegment(
|
||
|
canvas, tree, node_font=node_font, leaf_font=leaf_font
|
||
|
)
|
||
|
self._treelet['draggable'] = 1
|
||
|
|
||
|
# Center the treelet.
|
||
|
(x1, y1, x2, y2) = self._treelet.bbox()
|
||
|
w, h = int(canvas['width']), int(canvas['height'])
|
||
|
self._treelet.move((w - x1 - x2) / 2, (h - y1 - y2) / 2)
|
||
|
|
||
|
# Mark the places where we can add it to the workspace.
|
||
|
self._markproduction(production)
|
||
|
|
||
|
def destroy(self, *args):
|
||
|
self._top.destroy()
|
||
|
|
||
|
def mainloop(self, *args, **kwargs):
|
||
|
self._top.mainloop(*args, **kwargs)
|
||
|
|
||
|
|
||
|
def demo2():
|
||
|
from nltk import Nonterminal, Production, CFG
|
||
|
|
||
|
nonterminals = 'S VP NP PP P N Name V Det'
|
||
|
(S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
|
||
|
productions = (
|
||
|
# Syntactic Productions
|
||
|
Production(S, [NP, VP]),
|
||
|
Production(NP, [Det, N]),
|
||
|
Production(NP, [NP, PP]),
|
||
|
Production(VP, [VP, PP]),
|
||
|
Production(VP, [V, NP, PP]),
|
||
|
Production(VP, [V, NP]),
|
||
|
Production(PP, [P, NP]),
|
||
|
Production(PP, []),
|
||
|
Production(PP, ['up', 'over', NP]),
|
||
|
# Lexical Productions
|
||
|
Production(NP, ['I']),
|
||
|
Production(Det, ['the']),
|
||
|
Production(Det, ['a']),
|
||
|
Production(N, ['man']),
|
||
|
Production(V, ['saw']),
|
||
|
Production(P, ['in']),
|
||
|
Production(P, ['with']),
|
||
|
Production(N, ['park']),
|
||
|
Production(N, ['dog']),
|
||
|
Production(N, ['statue']),
|
||
|
Production(Det, ['my']),
|
||
|
)
|
||
|
grammar = CFG(S, productions)
|
||
|
|
||
|
text = 'I saw a man in the park'.split()
|
||
|
d = CFGDemo(grammar, text)
|
||
|
d.mainloop()
|
||
|
|
||
|
|
||
|
######################################################################
|
||
|
# Old Demo
|
||
|
######################################################################
|
||
|
|
||
|
|
||
|
def demo():
|
||
|
from nltk import Nonterminal, CFG
|
||
|
|
||
|
nonterminals = 'S VP NP PP P N Name V Det'
|
||
|
(S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
|
||
|
|
||
|
grammar = CFG.fromstring(
|
||
|
"""
|
||
|
S -> NP VP
|
||
|
PP -> P NP
|
||
|
NP -> Det N
|
||
|
NP -> NP PP
|
||
|
VP -> V NP
|
||
|
VP -> VP PP
|
||
|
Det -> 'a'
|
||
|
Det -> 'the'
|
||
|
Det -> 'my'
|
||
|
NP -> 'I'
|
||
|
N -> 'dog'
|
||
|
N -> 'man'
|
||
|
N -> 'park'
|
||
|
N -> 'statue'
|
||
|
V -> 'saw'
|
||
|
P -> 'in'
|
||
|
P -> 'up'
|
||
|
P -> 'over'
|
||
|
P -> 'with'
|
||
|
"""
|
||
|
)
|
||
|
|
||
|
def cb(grammar):
|
||
|
print(grammar)
|
||
|
|
||
|
top = Tk()
|
||
|
editor = CFGEditor(top, grammar, cb)
|
||
|
Label(top, text='\nTesting CFG Editor\n').pack()
|
||
|
Button(top, text='Quit', command=top.destroy).pack()
|
||
|
top.mainloop()
|
||
|
|
||
|
|
||
|
def demo3():
|
||
|
from nltk import Production
|
||
|
|
||
|
(S, VP, NP, PP, P, N, Name, V, Det) = nonterminals(
|
||
|
'S, VP, NP, PP, P, N, Name, V, Det'
|
||
|
)
|
||
|
|
||
|
productions = (
|
||
|
# Syntactic Productions
|
||
|
Production(S, [NP, VP]),
|
||
|
Production(NP, [Det, N]),
|
||
|
Production(NP, [NP, PP]),
|
||
|
Production(VP, [VP, PP]),
|
||
|
Production(VP, [V, NP, PP]),
|
||
|
Production(VP, [V, NP]),
|
||
|
Production(PP, [P, NP]),
|
||
|
Production(PP, []),
|
||
|
Production(PP, ['up', 'over', NP]),
|
||
|
# Lexical Productions
|
||
|
Production(NP, ['I']),
|
||
|
Production(Det, ['the']),
|
||
|
Production(Det, ['a']),
|
||
|
Production(N, ['man']),
|
||
|
Production(V, ['saw']),
|
||
|
Production(P, ['in']),
|
||
|
Production(P, ['with']),
|
||
|
Production(N, ['park']),
|
||
|
Production(N, ['dog']),
|
||
|
Production(N, ['statue']),
|
||
|
Production(Det, ['my']),
|
||
|
)
|
||
|
|
||
|
t = Tk()
|
||
|
|
||
|
def destroy(e, t=t):
|
||
|
t.destroy()
|
||
|
|
||
|
t.bind('q', destroy)
|
||
|
p = ProductionList(t, productions)
|
||
|
p.pack(expand=1, fill='both')
|
||
|
p.add_callback('select', p.markonly)
|
||
|
p.add_callback('move', p.markonly)
|
||
|
p.focus()
|
||
|
p.mark(productions[2])
|
||
|
p.mark(productions[8])
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
demo()
|