You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
47 lines
1.7 KiB
Python
47 lines
1.7 KiB
Python
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
from builtins import str, bytes, dict, int
|
|
|
|
import os
|
|
import sys
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
|
|
from pattern.search import search
|
|
from pattern.en import parsetree
|
|
|
|
# Constraints ending in "?" are optional, matching one or no word.
|
|
# Pattern.search() uses a "greedy" approach:
|
|
# it will attempt to include as many optional constraints as possible.
|
|
|
|
# The following pattern scans for words whose part-of-speech tag is NN (i.e. nouns).
|
|
# A preceding adjective, adverb or determiner are picked up as well.
|
|
for s in (
|
|
"the cat", # DT NN
|
|
"the very black cat", # DT RB JJ NN
|
|
"tasty cat food", # JJ NN NN
|
|
"the funny black cat", # JJ NN
|
|
"very funny", # RB JJ => no match, since there is no noun.
|
|
"my cat is black and your cat is white"): # NN + NN
|
|
t = parsetree(s)
|
|
m = search("DT? RB? JJ? NN+", t)
|
|
print("")
|
|
print(t)
|
|
print(m)
|
|
if m:
|
|
for w in m[0].words:
|
|
print("%s matches %s" % (w, m[0].constraint(w)))
|
|
|
|
# Before Pattern 2.4, "( )" was used instead of "?".
|
|
# For example: "(JJ)" instead of "JJ?".
|
|
# The syntax was changed to resemble regular expressions, which use "?".
|
|
# The old syntax "(JJ)" still works in Pattern 2.4, but it may change later.
|
|
|
|
# Note: the above pattern could also be written as "DT|RB|JJ?+ NN+"
|
|
# to include multiple adverbs/adjectives.
|
|
# By combining "*", "?" and "+" patterns can become quite complex.
|
|
# Optional constraints are useful for very specific patterns, but slow.
|
|
# Also, depending on which parser you use (e.g. MBSP), words can be tagged differently
|
|
# and may not match in the way you expect.
|
|
# Consider using a simple, robust "NP" search pattern.
|