You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
1.6 KiB
Python

5 years ago
# Natural Language Toolkit (NLTK) Help
#
# Copyright (C) 2001-2019 NLTK Project
# Authors: Steven Bird <stevenbird1@gmail.com>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
"""
Provide structured access to documentation.
"""
from __future__ import print_function
import re
from textwrap import wrap
from nltk.data import load
def brown_tagset(tagpattern=None):
_format_tagset("brown_tagset", tagpattern)
def claws5_tagset(tagpattern=None):
_format_tagset("claws5_tagset", tagpattern)
def upenn_tagset(tagpattern=None):
_format_tagset("upenn_tagset", tagpattern)
#####################################################################
# UTILITIES
#####################################################################
def _print_entries(tags, tagdict):
for tag in tags:
entry = tagdict[tag]
defn = [tag + ": " + entry[0]]
examples = wrap(
entry[1], width=75, initial_indent=' ', subsequent_indent=' '
)
print("\n".join(defn + examples))
def _format_tagset(tagset, tagpattern=None):
tagdict = load("help/tagsets/" + tagset + ".pickle")
if not tagpattern:
_print_entries(sorted(tagdict), tagdict)
elif tagpattern in tagdict:
_print_entries([tagpattern], tagdict)
else:
tagpattern = re.compile(tagpattern)
tags = [tag for tag in sorted(tagdict) if tagpattern.match(tag)]
if tags:
_print_entries(tags, tagdict)
else:
print("No matching tags found.")
if __name__ == '__main__':
brown_tagset(r'NN.*')
upenn_tagset(r'.*\$')
claws5_tagset('UNDEFINED')
brown_tagset(r'NN')