You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
53 lines
1.4 KiB
Python
53 lines
1.4 KiB
Python
5 years ago
|
# Natural Language Toolkit: WordNet stemmer interface
|
||
|
#
|
||
|
# Copyright (C) 2001-2019 NLTK Project
|
||
|
# Author: Steven Bird <stevenbird1@gmail.com>
|
||
|
# Edward Loper <edloper@gmail.com>
|
||
|
# URL: <http://nltk.org/>
|
||
|
# For license information, see LICENSE.TXT
|
||
|
from __future__ import unicode_literals
|
||
|
|
||
|
from nltk.corpus.reader.wordnet import NOUN
|
||
|
from nltk.corpus import wordnet
|
||
|
from nltk.compat import python_2_unicode_compatible
|
||
|
|
||
|
|
||
|
@python_2_unicode_compatible
|
||
|
class WordNetLemmatizer(object):
|
||
|
"""
|
||
|
WordNet Lemmatizer
|
||
|
|
||
|
Lemmatize using WordNet's built-in morphy function.
|
||
|
Returns the input word unchanged if it cannot be found in WordNet.
|
||
|
|
||
|
>>> from nltk.stem import WordNetLemmatizer
|
||
|
>>> wnl = WordNetLemmatizer()
|
||
|
>>> print(wnl.lemmatize('dogs'))
|
||
|
dog
|
||
|
>>> print(wnl.lemmatize('churches'))
|
||
|
church
|
||
|
>>> print(wnl.lemmatize('aardwolves'))
|
||
|
aardwolf
|
||
|
>>> print(wnl.lemmatize('abaci'))
|
||
|
abacus
|
||
|
>>> print(wnl.lemmatize('hardrock'))
|
||
|
hardrock
|
||
|
"""
|
||
|
|
||
|
def __init__(self):
|
||
|
pass
|
||
|
|
||
|
def lemmatize(self, word, pos=NOUN):
|
||
|
lemmas = wordnet._morphy(word, pos)
|
||
|
return min(lemmas, key=len) if lemmas else word
|
||
|
|
||
|
def __repr__(self):
|
||
|
return '<WordNetLemmatizer>'
|
||
|
|
||
|
|
||
|
# unload wordnet
|
||
|
def teardown_module(module=None):
|
||
|
from nltk.corpus import wordnet
|
||
|
|
||
|
wordnet._unload()
|