You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
from builtins import str, bytes, dict, int
|
|
|
|
import os
|
|
import sys
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
|
|
from pattern.search import match
|
|
from pattern.en import parsetree
|
|
|
|
# This example demonstrates how to create match groups.
|
|
# A match group is a number of consecutive constraints,
|
|
# for which matching words can easily be retrieved from a Match object.
|
|
|
|
# Suppose we are looking for adjectives preceding nouns.
|
|
# A simple pattern is: "JJ?+ NN",
|
|
# which matches nouns preceded by any number of adjectives.
|
|
# Since the number of nouns can be 0, 1 or 23 it is not so easy
|
|
# to fetch the adjectives from a Match. This can be achieved with a group:
|
|
|
|
s = "The big black cat"
|
|
t = parsetree(s)
|
|
print(match("{JJ?+} NN", t).group(1))
|
|
print("")
|
|
|
|
# Note the { } wrapper, indicating a group.
|
|
# The group can be retrieved from the match as a list of words.
|
|
|
|
# Suppose we are looking for prepositional noun phrases,
|
|
# e.g., on the mat, with a fork, under the hood, etc...
|
|
# The preposition is always one word (on, with, under),
|
|
# but the actual noun phrase can have many words (a shiny silver fork),
|
|
# so it is a hassle to retrieve it from the match.
|
|
|
|
# Normally, we would do it like this:
|
|
|
|
s = "The big black cat sat on the mat."
|
|
t = parsetree(s)
|
|
m = match("NP VP PP NP", t)
|
|
for w in m:
|
|
if m.constraint(w).index == 2:
|
|
print("This is the PP: %s" % w)
|
|
if m.constraint(w).index == 3:
|
|
print("This is the NP: %s" % w)
|
|
|
|
# In other words, iterate over each word in the match,
|
|
# checking which constraint it matched and filtering out what we need.
|
|
|
|
# It is easier with a group:
|
|
|
|
m = match("NP VP {PP} {NP}", t)
|
|
print("")
|
|
print("This is the PP: %s" % m.group(1))
|
|
print("This is the NP: %s" % m.group(2))
|
|
print("")
|
|
|
|
# Match.group(0) refers to the full search pattern:
|
|
print(m.group(0))
|