arranged folders
Before Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 31 KiB |
Before Width: | Height: | Size: 36 KiB |
@ -1 +0,0 @@
|
||||
venv/
|
@ -1,10 +0,0 @@
|
||||
import nltk
|
||||
|
||||
file=open('faceapp.txt','r')
|
||||
raw=file.read()
|
||||
tokens = nltk.word_tokenize(raw)
|
||||
faceapp = nltk.Text(tokens)
|
||||
|
||||
|
||||
faceapp.concordance('services')
|
||||
|
@ -1,39 +0,0 @@
|
||||
import sys
|
||||
import codecs
|
||||
import nltk
|
||||
from nltk.corpus import stopwords
|
||||
|
||||
# NLTK's default English stopwords
|
||||
default_stopwords = set(nltk.corpus.stopwords.words('english'))
|
||||
|
||||
#read stop words from a file (one stopword per line, UTF-8)
|
||||
stopwords_file = './stopwords.txt'
|
||||
custom_stopwords = set(codecs.open('stopwords.txt', 'r', 'utf-8').read().splitlines())
|
||||
|
||||
all_stopwords = default_stopwords | custom_stopwords
|
||||
|
||||
file = open('faceapp.txt','r')
|
||||
raw = file.read()
|
||||
tokens = nltk.word_tokenize(raw)
|
||||
faceapp = nltk.Text(tokens)
|
||||
faceapp.concordance('services')
|
||||
|
||||
# Remove single-character tokens (mostly punctuation)
|
||||
tokens = [word for word in tokens if len(word) > 1]
|
||||
|
||||
# Remove numbers
|
||||
tokens = [word for word in tokens if not word.isnumeric()]
|
||||
|
||||
# Lowercase all words (default_stopwords are lowercase too)
|
||||
tokens = [word.lower() for word in tokens]
|
||||
|
||||
# Remove stopwords
|
||||
tokens = [word for word in tokens if word not in all_stopwords]
|
||||
|
||||
|
||||
# Calculate frequency distribution
|
||||
fdist = nltk.FreqDist(tokens)
|
||||
|
||||
# Output top 50 words
|
||||
for word, frequency in fdist.most_common(10):
|
||||
print(u'{};{}'.format(word, frequency))
|
@ -1,32 +0,0 @@
|
||||
import sys
|
||||
import codecs
|
||||
import nltk
|
||||
import json
|
||||
from nltk.corpus import stopwords
|
||||
from nltk import sent_tokenize, word_tokenize, pos_tag
|
||||
|
||||
|
||||
#read stop words from a file (one stopword per line, UTF-8)
|
||||
stopwords_file = './stopwords.txt'
|
||||
custom_stopwords = set(codecs.open('stopwords.txt', 'r', 'utf-8').read().splitlines())
|
||||
|
||||
|
||||
#open the txt file, read, and tokenize
|
||||
file = open('faceapp.txt','r')
|
||||
raw = file.read()
|
||||
tokens = nltk.word_tokenize(raw)
|
||||
faceapp = nltk.Text(tokens)
|
||||
|
||||
# Remove single-character tokens (mostly punctuation)
|
||||
tokens = [word for word in tokens if len(word) > 1]
|
||||
# Remove numbers
|
||||
tokens = [word for word in tokens if not word.isnumeric()]
|
||||
# Lowercase all words (default_stopwords are lowercase too)
|
||||
tokens = [word.lower() for word in tokens]
|
||||
|
||||
# pos_tag = [word_tokenize(sent) for sent in sent_tokenize(raw)]
|
||||
pos_tag = [pos_tag(word_tokenize(sent))for sent in sent_tokenize(raw)]
|
||||
print(pos_tag)
|
||||
|
||||
with open ('colonial-glossary.json', 'w') as json_file:
|
||||
json.dump(pos_tag, json_file)
|
@ -1,30 +0,0 @@
|
||||
import nltk
|
||||
|
||||
file=open('faceapp.txt','r')
|
||||
raw=file.read()
|
||||
tokens = nltk.word_tokenize(raw)
|
||||
faceapp = nltk.Text(tokens)
|
||||
|
||||
|
||||
# my stopwords are common words I don't want to count, like "a", "an", "the".
|
||||
stopwords = set(line.strip() for line in open('stopwords.txt'))
|
||||
|
||||
# dictionary
|
||||
wordcount = {}
|
||||
|
||||
# spliting words from punctuation so "book" and "book!" counts as the same word
|
||||
for word in raw.lower().split():
|
||||
word = word.replace(".","")
|
||||
word = word.replace(",","")
|
||||
word = word.replace(":","")
|
||||
word = word.replace("\"","")
|
||||
word = word.replace("!","")
|
||||
word = word.replace("“","")
|
||||
word = word.replace("‘","")
|
||||
word = word.replace("*","")
|
||||
word = word.replace("(","")
|
||||
word = word.replace(")","")
|
||||
|
||||
|
||||
faceapp.concordance('a')
|
||||
|
@ -1,58 +0,0 @@
|
||||
EPISTEMIC = "epistemic" # Expresses degree of coloniality.
|
||||
|
||||
|
||||
# gradation of intensity words
|
||||
# 100.00 = absolute level of coloniality
|
||||
# 90.00 = extreme level of coloniality
|
||||
# 80.00 = heavy level of coloniality
|
||||
# 70.00 = high level of coloniality
|
||||
# 60.00 = significant level of coloniality
|
||||
# 50.00 =
|
||||
# 40.00 = relative level of coloniality
|
||||
# 30.00 = moderate level of coloniality
|
||||
# 20.00 = reasonable level of coloniality
|
||||
# 10.00 = fair level of coloniality
|
||||
# 0.00 = neutral level of coloniality
|
||||
|
||||
# lists of part of speech
|
||||
#MD = would, could...
|
||||
#RB = adverb 'very', 'slightly'...
|
||||
#VB = verb
|
||||
#JJ = adjective 'big'...
|
||||
#NN = noun
|
||||
#CC = coordinating conjunction 'and', 'or'...
|
||||
#PRP = personal pronoun 'I', 'he', 'she'...
|
||||
|
||||
|
||||
|
||||
epistemic_MD = { # would => could => can => should => shall => will => must
|
||||
100.00: d("have", "has", "must", "need"),
|
||||
90.00: d("have", "has", "must", "need"),
|
||||
80.00: d("can", "ca", "may"),
|
||||
70.00: d(),
|
||||
60.00: d(),
|
||||
50.00: d("shall", "sha"),
|
||||
40.00: d("will", "'ll", "wo"),
|
||||
30.00: d(),
|
||||
20.00: d("can", "ca", "may"),
|
||||
10.00: d("could", "dare", "might"),
|
||||
0.00: d("would"),
|
||||
}
|
||||
|
||||
|
||||
epistemic_VB = { #verbs from FaceApp ToS
|
||||
100.00: d("must", "agree","use"),
|
||||
90.00: d("use", "bound", "access", "allow", "acknowlegde", "reproduce"),
|
||||
80.00: d("choose","claim", "permit", "collect" ),
|
||||
70.00: d("change", ),
|
||||
60.00: d("create"),
|
||||
50.00: d(),
|
||||
40.00: d("maintain"),
|
||||
30.00: d("support"),
|
||||
20.00: d("identify"),
|
||||
10.00: d("may"),
|
||||
0.00: d(),
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,75 +0,0 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.pyc
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
.coveralls.yml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
*.dev*
|
||||
*.nja
|
||||
|
||||
build
|
||||
dist
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Flymake
|
||||
*_flymake.py
|
||||
|
||||
# Pattern specific ignore pattern
|
||||
pattern/web/cache/tmp/
|
||||
web/cache/tmp/
|
||||
pattern_unittest_db
|
||||
test/pattern_unittest_db
|
||||
|
||||
.DS_Store
|
@ -1,249 +0,0 @@
|
||||
[MASTER]
|
||||
|
||||
# Specify a configuration file.
|
||||
#rcfile=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
# pygtk.require().
|
||||
#init-hook=
|
||||
|
||||
# Profiled execution.
|
||||
profile=no
|
||||
|
||||
# Add files or directories to the blacklist. They should be base names, not
|
||||
# paths.
|
||||
ignore=CVS, feed, json, pdf, soup, pywordnet, svm
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# List of plugins (as comma separated values of python modules names) to load,
|
||||
# usually to register additional checkers.
|
||||
load-plugins=
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time.
|
||||
#enable=
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You
|
||||
# can either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time (only on the command line, not in the configuration file where
|
||||
# it should appear only once).
|
||||
disable=C0103,W0142,E1103
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Set the output format. Available formats are text, parseable, colorized, msvs
|
||||
# (visual studio) and html
|
||||
output-format=text
|
||||
|
||||
# Include message's id in output
|
||||
include-ids=yes
|
||||
|
||||
# Put messages in a separate file for each module / package specified on the
|
||||
# command line instead of printing them on stdout. Reports (if any) will be
|
||||
# written in a file name "pylint_global.[txt|html]".
|
||||
files-output=no
|
||||
|
||||
# Tells whether to display a full report or only the messages
|
||||
reports=yes
|
||||
|
||||
# Python expression which should return a note less than 10 (10 is the highest
|
||||
# note). You have access to the variables errors warning, statement which
|
||||
# respectively contain the number of errors / warnings messages and the total
|
||||
# number of statements analyzed. This is used by the global evaluation report
|
||||
# (RP0004).
|
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
||||
|
||||
# Add a comment according to your evaluation note. This is used by the global
|
||||
# evaluation report (RP0004).
|
||||
comment=no
|
||||
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Required attributes for module, separated by a comma
|
||||
required-attributes=
|
||||
|
||||
# List of builtins function names that should not be used, separated by a comma
|
||||
bad-functions=map,filter,apply,input
|
||||
|
||||
# Regular expression which should only match correct module names
|
||||
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
|
||||
|
||||
# Regular expression which should only match correct module level names
|
||||
const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
|
||||
|
||||
# Regular expression which should only match correct class names
|
||||
class-rgx=[A-Z_][a-zA-Z0-9]+$
|
||||
|
||||
# Regular expression which should only match correct function names
|
||||
function-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct method names
|
||||
method-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct instance attribute names
|
||||
attr-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct argument names
|
||||
argument-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct variable names
|
||||
variable-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct list comprehension /
|
||||
# generator expression variable names
|
||||
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma
|
||||
good-names=i,j,k,ex,Run,_
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma
|
||||
bad-names=foo,bar,baz,toto,tutu,tata
|
||||
|
||||
# Regular expression which should only match functions or classes name which do
|
||||
# not require a docstring
|
||||
no-docstring-rgx=__.*__
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=100
|
||||
|
||||
# Maximum number of lines in a module
|
||||
max-module-lines=1000
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
# tab).
|
||||
indent-string=' '
|
||||
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
notes=FIXME,XXX,TODO
|
||||
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
min-similarity-lines=4
|
||||
|
||||
# Ignore comments when computing similarities.
|
||||
ignore-comments=yes
|
||||
|
||||
# Ignore docstrings when computing similarities.
|
||||
ignore-docstrings=yes
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# Tells whether missing members accessed in mixin class should be ignored. A
|
||||
# mixin class is detected if its name ends with "mixin" (case insensitive).
|
||||
ignore-mixin-members=yes
|
||||
|
||||
# List of classes names for which member attributes should not be checked
|
||||
# (useful for classes with attributes dynamically set).
|
||||
ignored-classes=SQLObject
|
||||
|
||||
# When zope mode is activated, add a predefined set of Zope acquired attributes
|
||||
# to generated-members.
|
||||
zope=no
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
# system, and so shouldn't trigger E0201 when accessed. Python regular
|
||||
# expressions are accepted.
|
||||
generated-members=REQUEST,acl_users,aq_parent
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
# Tells whether we should check for unused import in __init__ files.
|
||||
init-import=no
|
||||
|
||||
# A regular expression matching the beginning of the name of dummy variables
|
||||
# (i.e. not used).
|
||||
dummy-variables-rgx=_|dummy
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
# you should avoid to define new builtins when possible.
|
||||
additional-builtins=
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# List of interface methods to ignore, separated by a comma. This is used for
|
||||
# instance to not check methods defines in Zope's Interface base class.
|
||||
ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,__new__,setUp
|
||||
|
||||
# List of valid names for the first argument in a class method.
|
||||
valid-classmethod-first-arg=cls
|
||||
|
||||
|
||||
[DESIGN]
|
||||
|
||||
# Maximum number of arguments for function / method
|
||||
max-args=5
|
||||
|
||||
# Argument names that match this expression will be ignored. Default to name
|
||||
# with leading underscore
|
||||
ignored-argument-names=_.*
|
||||
|
||||
# Maximum number of locals for function / method body
|
||||
max-locals=15
|
||||
|
||||
# Maximum number of return / yield for function / method body
|
||||
max-returns=6
|
||||
|
||||
# Maximum number of branch for function / method body
|
||||
max-branchs=12
|
||||
|
||||
# Maximum number of statements in function / method body
|
||||
max-statements=50
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
max-parents=7
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
min-public-methods=2
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
max-public-methods=20
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma
|
||||
deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
|
||||
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the
|
||||
# given file (report RP0402 must not be disabled)
|
||||
import-graph=
|
||||
|
||||
# Create a graph of external dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
ext-import-graph=
|
||||
|
||||
# Create a graph of internal dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
int-import-graph=
|
||||
|
||||
|
||||
[EXCEPTIONS]
|
||||
|
||||
# Exceptions that will emit a warning when being caught. Defaults to
|
||||
# "Exception"
|
||||
overgeneral-exceptions=Exception
|
@ -1,44 +0,0 @@
|
||||
language: python
|
||||
|
||||
dist: precise
|
||||
|
||||
python:
|
||||
- "3.6"
|
||||
|
||||
before_install:
|
||||
- export TZ=Europe/Brussels
|
||||
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; else wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi
|
||||
- bash miniconda.sh -b -p $HOME/miniconda
|
||||
- export PATH="$HOME/miniconda/bin:$PATH"
|
||||
- conda update --yes conda
|
||||
- conda install --yes numpy scipy
|
||||
- pip install --quiet pytest pytest-cov pytest-xdist chardet
|
||||
|
||||
install:
|
||||
- python setup.py install --quiet
|
||||
- pip freeze
|
||||
# Install and compile libsvm and liblinear
|
||||
- sudo apt-get install -y build-essential
|
||||
- git clone https://github.com/cjlin1/libsvm
|
||||
- cd libsvm; make lib; sudo cp libsvm.so.2 /lib; sudo ln -s /lib/libsvm.so.2 /lib/libsvm.so; cd ..
|
||||
- git clone https://github.com/cjlin1/liblinear
|
||||
- cd liblinear; make lib; sudo cp liblinear.so.3 /lib; sudo ln -s /lib/liblinear.so.3 /lib/liblinear.so; cd ..
|
||||
|
||||
script:
|
||||
- pytest --cov=pattern
|
||||
|
||||
|
||||
after_script:
|
||||
- pip install --quiet coveralls
|
||||
- coveralls
|
||||
|
||||
branches:
|
||||
only:
|
||||
- development
|
||||
|
||||
notifications:
|
||||
email: false
|
||||
|
||||
# You can connect to MySQL/MariaDB using the username "travis" or "root" and a blank password.
|
||||
services:
|
||||
- mysql
|
@ -1,29 +0,0 @@
|
||||
Copyright (c) 2011-2013 University of Antwerp, Belgium
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Pattern nor the names of its
|
||||
contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
Before Width: | Height: | Size: 2.7 KiB |
Before Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 280 B |
Before Width: | Height: | Size: 16 KiB |
Before Width: | Height: | Size: 6.2 KiB |
Before Width: | Height: | Size: 187 B |
Before Width: | Height: | Size: 88 KiB |
Before Width: | Height: | Size: 108 KiB |
Before Width: | Height: | Size: 4.9 KiB |
Before Width: | Height: | Size: 5.1 KiB |
Before Width: | Height: | Size: 3.3 KiB |
Before Width: | Height: | Size: 8.5 KiB |
Before Width: | Height: | Size: 25 KiB |
Before Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 3.6 KiB |
Before Width: | Height: | Size: 5.4 KiB |
Before Width: | Height: | Size: 5.8 KiB |
Before Width: | Height: | Size: 6.3 KiB |
Before Width: | Height: | Size: 6.4 KiB |
Before Width: | Height: | Size: 6.2 KiB |
Before Width: | Height: | Size: 7.5 KiB |
Before Width: | Height: | Size: 7.0 KiB |
Before Width: | Height: | Size: 6.5 KiB |
Before Width: | Height: | Size: 8.1 KiB |
Before Width: | Height: | Size: 11 KiB |
Before Width: | Height: | Size: 5.8 KiB |
Before Width: | Height: | Size: 10 KiB |
Before Width: | Height: | Size: 9.1 KiB |
Before Width: | Height: | Size: 12 KiB |
Before Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 5.4 KiB |
Before Width: | Height: | Size: 6.4 KiB |
Before Width: | Height: | Size: 2.8 KiB |
Before Width: | Height: | Size: 3.9 KiB |
Before Width: | Height: | Size: 9.1 KiB |
Before Width: | Height: | Size: 7.4 KiB |
Before Width: | Height: | Size: 8.8 KiB |
Before Width: | Height: | Size: 48 KiB |
Before Width: | Height: | Size: 68 KiB |
Before Width: | Height: | Size: 6.6 KiB |
Before Width: | Height: | Size: 44 KiB |
Before Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 19 KiB |
Before Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 2.1 KiB |
Before Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 429 B |
@ -1,474 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>mbsp-tags</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/mbsp-tags" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/mbsp-tags</a></div>
|
||||
<h1>Penn Treebank II tag set</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1274" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p class="big"><a href="pattern.html">Pattern</a> and <a href="http://www.clips.ua.ac.be/pages/MBSP" target="_self">MBSP</a> assign meaningful tags to words and groups of words in a sentence. Each tag is a short code (such as "<span class="postag">DT</span>" for "determiner").</p>
|
||||
<p>The tag set is based on the Penn Treebank Tagging Guidelines [<a href="ftp://ftp.cis.upenn.edu/pub/treebank/doc/tagguide.ps.gz" target="_self">pdf</a>].</p>
|
||||
<h3>Part-of-speech tags</h3>
|
||||
<p>Part-of-speech tags are assigned to a single word according to its role in the sentence. Traditional grammar classifies words based on eight parts of speech: the verb (<span class="postag">VB</span>), the noun (<span class="postag">NN</span>), the pronoun (<span class="postag">PR</span>+<span class="postag">DT</span>), the adjective (<span class="postag">JJ</span>), the adverb (<span class="postag">RB</span>), the preposition (<span class="postag">IN</span>), the conjunction (<span class="postag">CC</span>), and the interjection (<span class="postag">UH</span>).</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
<td class="smallcaps">Example</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">CC </span></td>
|
||||
<td>conjunction, coordinating</td>
|
||||
<td><em>and, or, but</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">CD </span></td>
|
||||
<td>cardinal number</td>
|
||||
<td><em>five, three, 13%</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">DT </span></td>
|
||||
<td>determiner</td>
|
||||
<td><em>the, a, these <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">EX </span></td>
|
||||
<td>existential there</td>
|
||||
<td><em><span style="text-decoration: underline;">there</span> were six boys <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">FW </span></td>
|
||||
<td>foreign word</td>
|
||||
<td><em>mais <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">IN </span></td>
|
||||
<td>conjunction, subordinating or preposition</td>
|
||||
<td><em>of, on, before, unless <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">JJ </span></td>
|
||||
<td>adjective</td>
|
||||
<td><em>nice, easy </em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">JJR </span></td>
|
||||
<td>adjective, comparative</td>
|
||||
<td><em>nicer, easier</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">JJS </span></td>
|
||||
<td>adjective, superlative</td>
|
||||
<td><em>nicest, easiest <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">LS </span></td>
|
||||
<td>list item marker</td>
|
||||
<td><em> </em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">MD </span></td>
|
||||
<td>verb, modal auxillary</td>
|
||||
<td><em>may, should <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">NN </span></td>
|
||||
<td>noun, singular or mass</td>
|
||||
<td><em>tiger, chair, laughter <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">NNS </span></td>
|
||||
<td>noun, plural</td>
|
||||
<td><em>tigers, chairs, insects <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">NNP </span></td>
|
||||
<td>noun, proper singular</td>
|
||||
<td><em>Germany, God, Alice <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">NNPS </span></td>
|
||||
<td>noun, proper plural</td>
|
||||
<td><em>we met two <span style="text-decoration: underline;">Christmases</span> ago <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PDT </span></td>
|
||||
<td>predeterminer</td>
|
||||
<td><em><span style="text-decoration: underline;">both</span> his children <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">POS</span></td>
|
||||
<td>possessive ending</td>
|
||||
<td><em>'s</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PRP </span></td>
|
||||
<td>pronoun, personal</td>
|
||||
<td><em>me, you, it <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PRP$ </span></td>
|
||||
<td>pronoun, possessive</td>
|
||||
<td><em>my, your, our <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">RB </span></td>
|
||||
<td>adverb</td>
|
||||
<td><em>extremely, loudly, hard <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">RBR </span></td>
|
||||
<td>adverb, comparative</td>
|
||||
<td><em>better <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">RBS </span></td>
|
||||
<td>adverb, superlative</td>
|
||||
<td><em>best <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">RP </span></td>
|
||||
<td>adverb, particle</td>
|
||||
<td><em>about, off, up <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">SYM </span></td>
|
||||
<td>symbol</td>
|
||||
<td><em>% <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">TO </span></td>
|
||||
<td>infinitival to</td>
|
||||
<td><em>what <span style="text-decoration: underline;">to</span> do? <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">UH </span></td>
|
||||
<td>interjection</td>
|
||||
<td><em>oh, oops, gosh <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VB </span></td>
|
||||
<td>verb, base form</td>
|
||||
<td><em>think <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VBZ </span></td>
|
||||
<td>verb, 3rd person singular present</td>
|
||||
<td><em>she <span style="text-decoration: underline;">thinks </span><br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VBP </span></td>
|
||||
<td>verb, non-3rd person singular present</td>
|
||||
<td><em>I <span style="text-decoration: underline;">think </span><br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VBD </span></td>
|
||||
<td>verb, past tense</td>
|
||||
<td><em>they <span style="text-decoration: underline;">thought </span><br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VBN </span></td>
|
||||
<td>verb, past participle</td>
|
||||
<td><em>a <span style="text-decoration: underline;">sunken</span> ship <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VBG </span></td>
|
||||
<td>verb, gerund or present participle</td>
|
||||
<td><em><span style="text-decoration: underline;">thinking</span> is fun <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">WDT </span></td>
|
||||
<td><em>wh</em>-determiner</td>
|
||||
<td><em>which, whatever, whichever <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">WP </span></td>
|
||||
<td><em>wh</em>-pronoun, personal</td>
|
||||
<td><em>what, who, whom <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">WP$</span></td>
|
||||
<td><em>wh</em>-pronoun, possessive</td>
|
||||
<td><em>whose, whosever <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">WRB</span></td>
|
||||
<td><em>wh</em>-adverb</td>
|
||||
<td><em>where, when <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">. </span></td>
|
||||
<td>punctuation mark, sentence closer</td>
|
||||
<td><em>.;?* <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">, </span></td>
|
||||
<td>punctuation mark, comma</td>
|
||||
<td><em>, <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">: </span></td>
|
||||
<td>punctuation mark, colon</td>
|
||||
<td><em>: <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">( </span></td>
|
||||
<td>contextual separator, left paren</td>
|
||||
<td><em>( <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">) </span></td>
|
||||
<td>contextual separator, right paren</td>
|
||||
<td><em>) <br /></em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3>Chunk tags</h3>
|
||||
<p>Chunk tags are assigned to groups of words that belong together (i.e. phrases). The most common phrases are the noun phrase (<span class="postag">NP</span>, for example <em>the black cat</em>) and the verb phrase (<span class="postag">VP</span>, for example <em>is purring</em>).</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
<td><span class="smallcaps">Words </span></td>
|
||||
<td><span class="smallcaps">Example </span></td>
|
||||
<td align="right">%</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">NP </span></td>
|
||||
<td>noun phrase<span class="postag"> </span></td>
|
||||
<td><span class="postag">DT</span>+<span class="postag">RB</span>+<span class="postag">JJ</span>+<span class="postag">NN</span> + <span class="postag">PR</span></td>
|
||||
<td><em>the strange bird</em></td>
|
||||
<td align="right"> 51</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PP </span></td>
|
||||
<td>prepositional phrase</td>
|
||||
<td><span class="postag">TO</span>+<span class="postag">IN </span></td>
|
||||
<td><em>in between</em></td>
|
||||
<td align="right"> 19</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VP </span></td>
|
||||
<td>verb phrase </td>
|
||||
<td><span class="postag">RB</span>+<span class="postag">MD</span>+<span class="postag">VB </span></td>
|
||||
<td><em>was looking<br /></em></td>
|
||||
<td align="right">9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">ADVP</span></td>
|
||||
<td>adverb phrase</td>
|
||||
<td><span class="postag">RB</span></td>
|
||||
<td><em>also<br /></em></td>
|
||||
<td align="right"> 6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">ADJP</span></td>
|
||||
<td>adjective phrase<span class="postag"> </span></td>
|
||||
<td><span class="postag">CC</span>+<span class="postag">RB</span>+<span class="postag">JJ</span></td>
|
||||
<td><em>warm and cosy</em></td>
|
||||
<td align="right"> 3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">SBAR</span></td>
|
||||
<td>subordinating conjunction </td>
|
||||
<td><span class="postag">IN</span></td>
|
||||
<td><em><span style="text-decoration: underline;">whether</span> or not<br /></em></td>
|
||||
<td align="right">3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PRT </span></td>
|
||||
<td>particle</td>
|
||||
<td><span class="postag">RP</span></td>
|
||||
<td><em><span style="text-decoration: underline;">up</span> the stairs</em></td>
|
||||
<td align="right"> 1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">INTJ</span></td>
|
||||
<td>interjection</td>
|
||||
<td><span class="postag">UH</span></td>
|
||||
<td><em>hello</em><em><br /></em></td>
|
||||
<td align="right"> 0</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>The IOB prefix marks whether a word is inside or outside of a chunk.</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">I-</span></td>
|
||||
<td>inside the chunk</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">B-</span></td>
|
||||
<td>inside the chunk, preceding word is part of a different chunk</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">O </span></td>
|
||||
<td>not part of a chunk</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>A prepositional noun phrase (<span class="postag">PNP</span>) is a group of chunks starting with a preposition (<span class="postag">PP</span>) followed by noun phrases (<span class="postag">NP</span>), for example: <em>under the table</em>.</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
<td class="smallcaps">Chunks</td>
|
||||
<td><span class="smallcaps">Example </span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PNP</span></td>
|
||||
<td>prepositional noun phrase</td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">NP</span><span class="postag"> </span></td>
|
||||
<td><em>as of today</em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3>Relation tags</h3>
|
||||
<p>Relations tags describe the relation between different chunks, and clarify the role of a chunk in that relation. The most common roles in a sentence are <span class="postag">SBJ</span> (subject noun phrase) and <span class="postag">OBJ</span> (object noun phrase). They link <span class="postag">NP</span> to <span class="postag">VP</span> chunks. The subject of a sentence is the person, thing, place or idea that is <em>doing</em> or <em>being</em> something. The object of a sentence is the person/thing affected by the action.</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
<td class="smallcaps">Chunks</td>
|
||||
<td><span class="smallcaps">Example </span></td>
|
||||
<td align="right"><span class="smallcaps">%</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-SBJ</span></td>
|
||||
<td>sentence subject</td>
|
||||
<td><span class="postag">NP</span><span class="postag"> </span></td>
|
||||
<td><em><span style="text-decoration: underline;">the cat</span> sat on the mat<br /></em></td>
|
||||
<td align="right">35</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-OBJ</span></td>
|
||||
<td>sentence object</td>
|
||||
<td><span class="postag">NP</span>+<span class="postag">SBAR</span></td>
|
||||
<td><em>the cat grabs <span style="text-decoration: underline;">the fish</span><br /></em></td>
|
||||
<td align="right">27</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-PRD </span></td>
|
||||
<td>predicate</td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">NP</span>+<span class="postag">ADJP </span></td>
|
||||
<td><em>the cat feels <span style="text-decoration: underline;">warm and fuzzy</span><br /></em></td>
|
||||
<td align="right">7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-TMP</span></td>
|
||||
<td>temporal </td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">NP</span>+<span class="postag">ADVP</span></td>
|
||||
<td><em>arrive </em><em><span style="text-decoration: underline;">at noon</span> <br /></em></td>
|
||||
<td align="right">7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-CLR </span></td>
|
||||
<td>closely related</td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">NP</span>+<span class="postag">ADVP </span></td>
|
||||
<td><em>work </em><em><span style="text-decoration: underline;">as a researcher</span> <br /></em></td>
|
||||
<td align="right">6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-LOC</span></td>
|
||||
<td>location </td>
|
||||
<td><span class="postag">PP </span></td>
|
||||
<td><em>live </em><em><span style="text-decoration: underline;">in Belgium</span> <br /></em></td>
|
||||
<td align="right">4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-DIR </span></td>
|
||||
<td>direction</td>
|
||||
<td><span class="postag">PP </span></td>
|
||||
<td><em>walk</em><em> <span style="text-decoration: underline;">towards</span> the door<br /></em></td>
|
||||
<td align="right">3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-EXT</span></td>
|
||||
<td>extent</td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">NP </span></td>
|
||||
<td><em>drop <span style="text-decoration: underline;">10 %</span><br /></em></td>
|
||||
<td align="right">1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-PRP</span></td>
|
||||
<td>purpose</td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">SBAR </span></td>
|
||||
<td><em>die <span style="text-decoration: underline;">as a result</span> of <br /></em></td>
|
||||
<td align="right">1</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3>Anchor tags</h3>
|
||||
<p>Anchor tags describe how prepositional noun phrases (<span class="postag">PNP</span>) are attached to other chunks in the sentence. For example, in the sentence, <em>I eat pizza with a fork</em>, the anchor of <em>with a fork</em> is <em>eat</em> because it answers the question: "In what way do I eat?"</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
<td><span class="smallcaps">Example </span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">A1</span></td>
|
||||
<td>anchor chunks that corresponds to <span class="postag">P1</span></td>
|
||||
<td><em><span style="text-decoration: underline;">eat</span> with a fork<br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">P1 </span></td>
|
||||
<td><span class="postag">PNP</span> that corresponds to <span class="postag">A1 </span></td>
|
||||
<td><em>eat <span style="text-decoration: underline;">with a fork</span><br /></em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p> </p>
|
||||
<p><strong>Occurence estimate </strong><span class="small"><br /></span></p>
|
||||
<p><span class="small">The given percentages for chunk and relations tags are based on tenfold cross validation on sections 10 to 19 of the WSJ Corpus of the Penn Treebank II by Sabine Buchholz, from which we derived a rough indication. The estimate means that if a 100 chunk tags are found, about 50 would be <span class="postag">NP</span> tags and 35 would have a <span class="postag">SBJ</span> relation tag. About 30 of the chunks would be tagged as <span class="postag">NP-SBJ</span>, and 15 as <span class="postag">NP-OBJ</span>. </span></p>
|
||||
<p><span class="small"><span style="text-decoration: underline;">Reference</span>: Buchholz, S. (2002). <em>Memory-Based Grammatical Relation Finding</em>. ILK, Tilburg University.</span></p>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1,367 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-dev</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-dev" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-dev</a></div>
|
||||
<h1>pattern.dev</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1480" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p><span class="big">Pattern is a web mining module for the Python programming language.</span></p>
|
||||
<p><span class="big">Pattern is written in Python with extensions in JavaScript. The source code is hosted on GitHub. It is licensed under BSD, so it can be freely incorporated in proprietary applications. Contributions and donations are welcomed.</span></p>
|
||||
<p>There are six core modules in the <a href="pattern.html">pattern</a> package: <a href="pattern-web.html">web</a> | <a href="pattern-db.html">db</a> | <a href="pattern-text.html">text</a> | <a href="pattern-search.html">search</a> | <a href="pattern-vector.html">vector</a> | <a href="pattern-graph.html">graph</a>.</p>
|
||||
<p><img src="../g/pattern_schema.gif" alt="" width="620" height="180" /></p>
|
||||
<hr />
|
||||
<h2>Topics</h2>
|
||||
<ul>
|
||||
<li><a href="#contribute">Contributing</a></li>
|
||||
<li><a href="#dependencies">Dependencies</a></li>
|
||||
<li><a href="#documentation">Documentation</a></li>
|
||||
<li><a href="#code">Coding conventions</a></li>
|
||||
<li><a href="#quality">Code quality</a></li>
|
||||
<li><a href="#language">Language support</a></li>
|
||||
</ul>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="contribute"></a>Contribute</h2>
|
||||
<p>The source code is hosted on <a href="https://github.com/clips/pattern" target="_blank">GitHub</a> (see <a class="noexternal link-maintenance" href="http://www.github.com/clips/pattern" target="_blank">http://ithub.com/clips/pattern</a>). GitHub is an online project hosting service with version control. Version control tracks changes to the source code, i.e., it can be rolled back to an earlier state or merged with revisions from different contributors.</p>
|
||||
<p>To work on Pattern, create a <a href="http://help.github.com/fork-a-repo/" target="_blank">fork</a> of the project, a local copy of the source code that can be edited and updated by you alone. You can manage this copy with the free GitHub application (<a class="noexternal link-maintenance" href="http://windows.github.com/" target="_blank">windows</a> | <a class="noexternal link-maintenance" href="http://mac.github.com/" target="_blank">mac</a>). When you are ready, send us a <a href="http://help.github.com/send-pull-requests/" target="_blank">pull</a> request and we will integrate your changes in the main project.</p>
|
||||
<p>Let us know if you encounter a bug. We prefer if you create an <a href="https://github.com/clips/pattern/issues" target="_blank">issue</a> on GitHub, so that (until fixed) the problem is visible to all users of Pattern. There is a blue button for donations on the main documentation page. Please support the development if you use Pattern commercially.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="dependencies"></a>Dependencies</h2>
|
||||
<p>There are six core modules in the package:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Module</span></td>
|
||||
<td><span class="smallcaps">Functionality</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.web</td>
|
||||
<td>Asynchronous requests, web services, web crawler, HTML DOM parser.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.db</td>
|
||||
<td>Wrappers for databases (MySQL, SQLite) and CSV-files.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.text</td>
|
||||
<td>Base classes for parsers, parse trees and sentiment analysis.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.search</td>
|
||||
<td>Pattern matching algorithm for parsed text (syntax & semantics).</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.vector</td>
|
||||
<td>Vector space model, clustering, classification.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.graph</td>
|
||||
<td>Graph analysis & visualization.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>There are two helper modules: pattern.metrics (statistics) and canvas.js (visualization).</p>
|
||||
<h3>Design philosophy</h3>
|
||||
<p>Pattern is written in Python, with JavaScript extensions for data visualization (graph.js and canvas.js). The package works out of the box. If C/C++ code is bundled for performance (e.g., LIBSVM), it includes precompiled binaries for all major platforms (Windows, Linux, Mac).</p>
|
||||
<p>Pattern modules are standalone. If a module imports another module, it fails silently if that module is not present. For example, pattern.text implements a parser that uses a Perceptron language model when pattern.vector is present, but falls back to a lexicon of known words and rules for unknown words if used by itself. A single module can have a lot of interdependent classes, hence the large __init.__.py files.</p>
|
||||
<p>Pattern modules can bundle other BSD-licensed Python projects (e.g., BeautifulSoup). For larger projects or GPL-licensed projects, it provides code to map data structures.</p>
|
||||
<h3>Base classes</h3>
|
||||
<p>In pattern.web, each web service (e.g., Google, Twitter) inherits from <span class="inline_code">SearchEngine</span> and returns <span class="inline_code">Result</span> objects. Each MediaWiki web service (e.g., Wikipedia, Wiktionary) inherits from <span class="inline_code">MediaWiki</span>.</p>
|
||||
<p>In pattern.db, each database engine is wrapped by <span class="inline_code">Database</span>. It supports MySQL and SQLite, with future plans for MongoDB. See <span class="inline_code">Database</span><span class="inline_code">.connect()</span>, <span class="inline_code">escape()</span>, <span class="inline_code">_field_SQL()</span> and <span class="inline_code">_update()</span>.</p>
|
||||
<p>In pattern.text, each language inherits from <span class="inline_code">Parser</span>, having a lexicon of known words and an optional language model. Case studies for <a class="link-maintenance" href="http://www.clips.ua.ac.be/pages/using-wikicorpus-nltk-to-build-a-spanish-part-of-speech-tagger">Spanish</a> and <a class="link-maintenance" href="http://www.clips.ua.ac.be/pages/using-wiktionary-to-build-an-italian-part-of-speech-tagger">Italian</a> show how to train a <span class="inline_code">Lexicon</span>. A bundled pattern.vector example shows how to train a Perceptron <span class="inline_code">Model</span>.</p>
|
||||
<p>In pattern.vector, each classifier inherits from <span class="inline_code">Classifier</span> (e.g., KNN, SVM). Each clustering algorithm is available from <span class="inline_code">Model.cluster()</span>.</p>
|
||||
<p>In pattern.graph, subclasses of <span class="inline_code">Node</span> or <span class="inline_code">Edge</span> can be used with (subclasses of) <span class="inline_code">Graph</span> by setting the <span class="inline_code">base</span> parameter of <span class="inline_code">Graph.add_node()</span> and <span class="inline_code">add_edge()</span>. Each layout algorithm (e.g., force-based springs) inherits from <span class="inline_code">GraphLayout</span>.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="documentation"></a>Documentation</h2>
|
||||
<p>Each function or method has a docstring:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">def find(match=lambda item: False, list=[]):
|
||||
""" Returns the first item in the given list for which match(item) is True.
|
||||
"""
|
||||
for item in list:
|
||||
if match(item) is True:
|
||||
return item</pre></div>
|
||||
<p>The docstring provides a concise description of the type of input and output. In Pattern, a docstrings starts with "Returns" (for a function) or "Yields" (for a property). Each function has a unit test, to verify that it is fit for use. Each function has an engaging example, bundled in the package or in the documentation.</p>
|
||||
<p>Pattern does not have a documentation framework. The documentation is written by hand and in constant revision. Please report spelling errors and examples with bugs.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="code"></a>Coding conventions</h2>
|
||||
<h3>Whitespace</h3>
|
||||
<p>The source code is not strict <a href="http://www.python.org/dev/peps/pep-0008/" target="_blank">PEP8</a>. For example, additional whitespace is used so that property assignments or inline comments are vertically aligned as a block:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">class Table(object):
|
||||
def __init__(self, name, database):
|
||||
""" A collection of rows with one or more fields of a certain type.
|
||||
"""
|
||||
self.database = database
|
||||
self.name = name
|
||||
self.fields = [] # List of field names (i.e., column names).
|
||||
self.schema = {} # Dictionary of (field, Schema)-items.
|
||||
self.default = {} # Default values for Table.insert().
|
||||
self.primary_key = None
|
||||
self._update()</pre></div>
|
||||
<p>Whitespace is sometimes used to align dictionary keys and values:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">url = URL('http://search.twitter.com/search.json?', method=GET, query={
|
||||
'q': query,
|
||||
'page': start,
|
||||
'rpp': min(count, 100)
|
||||
})</pre></div>
|
||||
<h3>Class and function names</h3>
|
||||
<p>Single words are preferred for class names. Compound terms use CamelCase, e.g., <span class="inline_code">SearchEngine</span> or <span class="inline_code">AsynchronousRequest</span>. Single, descriptive words are preferred for functions and methods. Compound terms use lowercase_with_underscore. If a method takes no arguments, it is a property:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">class AsynchronousRequest:
|
||||
@property
|
||||
def done(self):
|
||||
return not self._thread.isAlive() # We'd prefer "_thread.alive".</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">while not request.done:
|
||||
... </pre></div>
|
||||
<h3>Variable names</h3>
|
||||
<p>The source code uses single character names abundantly. For example, dictionary <span style="text-decoration: underline;">k</span>eys and <span style="text-decoration: underline;">v</span>alues are <span class="inline_code">k</span> and <span class="inline_code">v</span>, a string is <span class="inline_code">s</span>. This is done to make the structure of the algorithm stand out (i.e., the actual function and method calls):</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">def normalize(s, punctuation='!?.:;,()[] '):
|
||||
s = s.decode('utf-8')
|
||||
s = s.lower()
|
||||
s = s.strip(punctuation)
|
||||
return s</pre></div>
|
||||
<p>Frequently used single character variable names:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="smallcaps">Variable</span></td>
|
||||
<td><span class="smallcaps">Meaning</span></td>
|
||||
<td><span class="smallcaps">Example</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">a</span></td>
|
||||
<td>array, all</td>
|
||||
<td><span class="inline_code">a = [normalize(w) for w in words]</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">b</span></td>
|
||||
<td>boolean</td>
|
||||
<td><span class="inline_code">while b is False:</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">d</span></td>
|
||||
<td>distance, document</td>
|
||||
<td><span class="inline_code">d = distance(v1, v2)</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">e</span></td>
|
||||
<td>element</td>
|
||||
<td><span class="inline_code">e = html.find('#nav')</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">f</span></td>
|
||||
<td>file, filter, function</td>
|
||||
<td><span class="inline_code">f = open('data.csv', 'r')</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">i</span></td>
|
||||
<td>index</td>
|
||||
<td><span class="inline_code">for i in range(len(matrix)):</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">j</span></td>
|
||||
<td>index</td>
|
||||
<td><span class="inline_code">for j in range(len(matrix[i])):</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">k</span></td>
|
||||
<td>key</td>
|
||||
<td><span class="inline_code">for k in vector.keys():</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">n</span></td>
|
||||
<td>list length</td>
|
||||
<td><span class="inline_code">n = len(a)</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">p</span></td>
|
||||
<td>parser, pattern</td>
|
||||
<td><span class="inline_code">p = pattern.search.compile('NN')</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">q</span></td>
|
||||
<td>query</td>
|
||||
<td><span class="inline_code">for r in twitter.search(q):</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">r</span></td>
|
||||
<td>result, row</td>
|
||||
<td><span class="inline_code">for r in csv('data.csv):</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">s</span></td>
|
||||
<td>string</td>
|
||||
<td><span class="inline_code">s = s.decode('utf-8').strip()</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">t</span></td>
|
||||
<td>time</td>
|
||||
<td><span class="inline_code">t = time.time() - t0</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">v</span></td>
|
||||
<td>value, vector</td>
|
||||
<td><span class="inline_code">for k, v in vector.items():</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">w</span></td>
|
||||
<td>word</td>
|
||||
<td><span class="inline_code">for i, w in enumerate(sentence.words):</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">x</span></td>
|
||||
<td>horizontal position</td>
|
||||
<td><span class="inline_code">node.x = 0</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">y</span></td>
|
||||
<td>vertical position</td>
|
||||
<td><span class="inline_code">node.y = 0</span></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3>Dictionaries</h3>
|
||||
<p>The source code uses dictionaries abundantly. Dictionaries are fast for lookup. For example, pattern.vector represents vectors as sparse feature → weight dictionaries:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">v1 = document1.vector
|
||||
v2 = document2.vector
|
||||
cos = sum(v1.get(w,0) * f for w, f in v2.items()) / (norm(v1) * norm(v2) or 1)</pre></div>
|
||||
<p>Pattern algorithms are <a class="link-maintenance" href="pattern-metrics.html#profile">profiled</a> and optimized with caching mechanisms.</p>
|
||||
<h3>List comprehensions</h3>
|
||||
<p>The source code uses list comprehension abundantly. It is concise, and often faster than <span class="inline_code">map()</span>. However, it can also be harder to read (a comment should be added).</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">def words(s, punctuation='!?.:;,()[] '):
|
||||
return [w.strip(punctuation) for w in s.split()]
|
||||
</pre></div>
|
||||
<h3>Ternary operator</h3>
|
||||
<p>Previous versions of Pattern supported Python 2.4, which does have the ternary operator (single-line if). A part of the source code still uses a boolean condition to emulate it:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">s = s.lower() if lowercase is True else s # Python 2.5+</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">s = lowercase is True and s.lower() or s # Python 2.4</pre></div>
|
||||
<p>With boolean conditions, care must be taken for values <span class="inline_code">0</span>, <span class="inline_code">''</span>, <span class="inline_code">[]</span>, <span class="inline_code">()</span>, <span class="inline_code">{}</span>, and <span class="inline_code">None</span>, since they evaluate as <span class="inline_code">False</span> and trigger the or-clause.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="quality"></a>Code quality</h2>
|
||||
<p>The source code has about 25,000 lines of Python code (25% unit tests), 5,000 lines of JavaScript, and 20,000 lines of bundled dependencies (BeautifulSoup, PDFMiner, PyWordNet, LIBSVM, LIBLINEAR, etc.). To evaluate the code quality, <a href="http://www.logilab.org/857" target="_blank">pylint</a> can be used:</p>
|
||||
<div class="install">
|
||||
<pre class="gutter:false; light:true;">> cd pattern-2.x
|
||||
> pylint pattern --rcfile=.pylintrc</pre></div>
|
||||
<p>Important pylint id's are those starting with <span class="inline_code">E</span> (= possible bugs).</p>
|
||||
<p>The <span class="inline_code">.pylintrc</span> configuration file defines a number of custom settings:</p>
|
||||
<ul>
|
||||
<li>Instead of 80 characters per line, a 100 characters are allowed.</li>
|
||||
<li>Ignore pylint id <span class="inline_code">C0103</span>, single-character variable names are allowed.</li>
|
||||
<li>Ignore pylint id <span class="inline_code">W0142</span>, <span class="inline_code">*args</span> and <span class="inline_code">**kwargs</span> are allowed.</li>
|
||||
<li>Ignore bundled dependencies.</li>
|
||||
</ul>
|
||||
<p>The source code scores about 7.38 / 10. A known issue is the absence of docstrings in unit tests.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="language"></a>Language support</h2>
|
||||
<p>Pattern currently has natural language processing tools (e.g., pattern.en, pattern.es) for most languages on the to-do list. There is no sentiment analysis yet for Spanish and German. Chinese is an open task.</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Language</span></td>
|
||||
<td style="text-align: center;"><span class="smallcaps">Code</span></td>
|
||||
<td style="text-align: center;"><span class="smallcaps">Speakers</span></td>
|
||||
<td><span class="smallcaps">Example countries</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mandarin</td>
|
||||
<td style="text-align: center;"><span class="inline_code">cmn</span></td>
|
||||
<td style="text-align: center;">955M</td>
|
||||
<td>China + Taiwan (945), Singapore (3)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>Spanish</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">es</span></td>
|
||||
<td style="text-align: center;">350M</td>
|
||||
<td>Argentina (40), Colombia (40), Mexico (100), Spain (45)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>English</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">en</span></td>
|
||||
<td style="text-align: center;">340M</td>
|
||||
<td>Canada (30), United Kingdom (60), United States (300)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>German</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">de</span></td>
|
||||
<td style="text-align: center;">100M</td>
|
||||
<td>Austria (10), Germany (80), Switzerland (7)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>French</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">fr</span></td>
|
||||
<td style="text-align: center;">70M</td>
|
||||
<td>France (65), Côte d'Ivoire (20)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>Italian</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">it</span></td>
|
||||
<td style="text-align: center;">60M</td>
|
||||
<td>Italy (60)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>Dutch</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">nl</span></td>
|
||||
<td style="text-align: center;">25M</td>
|
||||
<td>The Netherlands (25), Belgium (5), Suriname (1)</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>There are two case studies that demonstrate how to build a pattern.xx language module:</p>
|
||||
<ul>
|
||||
<li><a href="http://www.clips.ua.ac.be/pages/using-wiktionary-to-build-an-italian-part-of-speech-tagger">Using Wikitionary to build an Italian part-of-speech tagger</a></li>
|
||||
<li><a href="http://www.clips.ua.ac.be/pages/using-wikicorpus-nltk-to-build-a-spanish-part-of-speech-tagger">Using Wikicorpus & NLTK to build a Spanish part-of-speech tagger</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1,590 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-fr</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-fr" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-fr</a></div>
|
||||
<h1>pattern.fr</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1697" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p><span class="big">The pattern.fr module contains a fast part-of-speech tagger for French (identifies nouns, adjectives, verbs, etc. in a sentence), sentiment analysis, and tools for French verb conjugation and noun singularization & pluralization.</span></p>
|
||||
<p>It can be used by itself or with other <a href="pattern.html">pattern</a> modules: <a href="pattern-web.html">web</a> | <a href="pattern-db.html">db</a> | <a href="pattern-en.html">en</a> | <a href="pattern-search.html">search</a> | <a href="pattern-vector.html">vector</a> | <a href="pattern-graph.html">graph</a>.</p>
|
||||
<p><img src="../g/pattern_schema_fr.gif" alt="" /></p>
|
||||
<hr />
|
||||
<h2>Documentation</h2>
|
||||
<p>The functions in this module take the same parameters and return the same values as their counterparts in <a href="pattern-en.html">pattern.en</a>. Refer to the documentation there for more details. </p>
|
||||
<h3>Noun singularization & pluralization</h3>
|
||||
<p>For French nouns there is <span class="inline_code">singularize()</span> and <span class="inline_code">pluralize()</span>. The implementation uses a statistical approach with 93% accuracy for singularization and 92% for pluralization.</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import singularize, pluralize
|
||||
>>>
|
||||
>>> print singularize('chats')
|
||||
>>> print pluralize('chat')
|
||||
|
||||
chat
|
||||
chats </pre></div>
|
||||
<h3>Verb conjugation</h3>
|
||||
<p>For French verbs there is <span class="inline_code">conjugate()</span>, <span class="inline_code">lemma()</span>, <span class="inline_code">lexeme()</span> and <span class="inline_code">tenses()</span>. The lexicon for verb conjugation contains about 1,750 common French verbs (constructed with Bob Salita's verb conjugation rules). For unknown verbs it will fall back to regular expressions with an accuracy of about 83%. </p>
|
||||
<p>French verbs have more tenses than English verbs. In particular, the plural differs for each person, and there are additional forms for the <span class="inline_code">FUTURE</span> tense, the <span class="inline_code">IMPERATIVE</span>, <span class="inline_code">CONDITIONAL</span> and <span class="inline_code">SUBJUNCTIVE</span> mood and the <span class="inline_code">PERFECTIVE</span> aspect:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import conjugate
|
||||
>>> from pattern.fr import INFINITIVE, PRESENT, PAST, SG, SUBJUNCTIVE, PERFECTIVE
|
||||
>>>
|
||||
>>> print conjugate('suis', INFINITIVE)
|
||||
>>> print conjugate('suis', PRESENT, 1, SG, mood=SUBJUNCTIVE)
|
||||
>>> print conjugate('suis', PAST, 3, SG)
|
||||
>>> print conjugate('suis', PAST, 3, SG, aspect=PERFECTIVE)
|
||||
|
||||
être
|
||||
sois
|
||||
était
|
||||
fut </pre></div>
|
||||
<p>For <span class="inline_code">PAST</span> tense + <span class="inline_code">PERFECTIVE</span> aspect we can also use <span class="inline_code">PRETERITE</span> (<em>passé simple</em>). For <span class="inline_code">PAST</span> tense + <span class="inline_code">IMPERFECTIVE</span> aspect we can also use <span class="inline_code">IMPERFECT</span> (<em>imparfait</em>):</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import conjugate
|
||||
>>> from pattern.fr import IMPERFECT, PRETERITE
|
||||
>>>
|
||||
>>> print conjugate('suis', IMPERFECT, 3, SG)
|
||||
>>> print conjugate('suis', PRETERITE, 3, SG)
|
||||
|
||||
était
|
||||
fut </pre></div>
|
||||
<p> The <span class="inline_code">conjugate()</span> function takes the following optional parameters:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="smallcaps">Tense</td>
|
||||
<td class="smallcaps">Person</td>
|
||||
<td class="smallcaps">Number</td>
|
||||
<td class="smallcaps">Mood</td>
|
||||
<td class="smallcaps">Aspect</td>
|
||||
<td class="smallcaps">Alias</td>
|
||||
<td class="smallcaps">Example</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">INFINITVE</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">"inf"</td>
|
||||
<td><em>être</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">suis</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">es</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">est</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">sommes</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">êtes</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">sont</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PROGRESSIVE</td>
|
||||
<td class="inline_code">"part"</td>
|
||||
<td><em>étant</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg!"</td>
|
||||
<td><em>sois</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl!"</td>
|
||||
<td><em>soyons</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl!"</td>
|
||||
<td><em>soyez</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg->"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">serais</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg->"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">serais</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg->"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">serait</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl->"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">serions</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl->"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">seriez</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl->"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">seraient</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg?"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">sois</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg?"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">sois</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg?"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">soit</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl?"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">soyons</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl?"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">soyez</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl?"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">soient</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp"</td>
|
||||
<td><em>j' <span style="text-decoration: underline;">étais</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">étais</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">était</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">étions</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">étiez</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">étaient</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PROGRESSIVE</td>
|
||||
<td class="inline_code">"ppart"</td>
|
||||
<td><em>été</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp+"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">fus</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp+"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">fus</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp+"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">fut</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl+"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">fûmes</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl+"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">fûtes</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl+"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">furent</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp?"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">fusse</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp?"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">fusses</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp?"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">fût</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl?"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">fussions</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl?"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">fussiez</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl?"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">fussent</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgf"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">serai</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgf"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">seras</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgf"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">sera</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1plf"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">serons</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2plf"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">serez</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3plf"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">seron</span></em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>Instead of optional parameters, a single short alias, or <span class="inline_code">PARTICIPLE</span> or <span class="inline_code">PAST+PARTICIPLE</span> can also be given. With no parameters, the infinitive form of the verb is returned.</p>
|
||||
<p class="small"><span style="text-decoration: underline;">Reference</span><span>: Salita, B. (2011). <em>French Verb Conjugation Rules</em>. Retrieved from: </span><span><a class="noexternal" style="color: inherit;" href="http://fvcr.sourceforge.net/" target="_blank">http://fvcr.sourceforge.net</a>.</span></p>
|
||||
<h3>Attributive & predicative adjectives </h3>
|
||||
<p>French adjectives inflect with an <span class="inline_code">-e</span>, <span class="inline_code">-s</span> or <span class="inline_code">-es</span> suffix depending on gender. There are many irregular cases (e.g., <em>curieux</em> → <em>une fille curieuse</em>). You can get the base form with the <span class="inline_code">predicative()</span> function. A statistical approach is used with an accuracy of 95%.</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import predicative
|
||||
>>> print predicative('curieuse')
|
||||
|
||||
curieux </pre></div>
|
||||
<h3>Sentiment analysis</h3>
|
||||
<p class="example">For opinion mining there is <span class="inline_code">sentiment()</span>, which returns a (<span class="inline_code">polarity</span>, <span class="inline_code">subjectivity</span>)-tuple, based on a lexicon of adjectives. Polarity is a value between <span class="inline_code">-1.0</span> and <span class="inline_code">+1.0</span>, subjectivity between <span class="inline_code">0.0</span> and <span class="inline_code">1.0</span>. The accuracy is around 74% (P 0.77, R 0.73) for book reviews:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import sentiment
|
||||
>>> print sentiment('Un livre magnifique!')
|
||||
|
||||
(1.0, 1.0) </pre></div>
|
||||
<h3>Parser</h3>
|
||||
<p>For parsing there is <span class="inline_code">parse()</span>, <span class="inline_code">parsetree()</span> and <span class="inline_code">split()</span>. The <span class="inline_code">parse()</span> function annotates words in the given string with their part-of-speech <a class="link-maintenance" href="mbsp-tags.html">tags</a> (e.g., <span class="postag">NN</span> for nouns and <span class="postag">VB</span> for verbs). The <span class="inline_code">parsetree()</span> function takes a string and returns a tree of nested objects (<span class="inline_code">Text</span> → <span class="inline_code">Sentence</span> → <span class="inline_code">Chunk</span> → <span class="inline_code">Word</span>). The <span class="inline_code">split()</span> function takes the output of <span class="inline_code">parse()</span> and returns a <span class="inline_code">Text</span>. See the <span class="inline_code">pattern.en</span> documentation (<span class="link-maintenance" style="color: #78aaff;"><a style="color: #8caaff; outline-style: none !important; outline-width: initial !important; outline-color: initial !important;" href="pattern-en.html#tree">here</a></span>) how to manipulate <span class="inline_code">Text</span> objects. </p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import parse, split
|
||||
>>>
|
||||
>>> s = parse(u"Le chat noir s'était assis sur le tapis.")
|
||||
>>> for sentence in split(s):
|
||||
>>> print sentence
|
||||
|
||||
Sentence('Le/DT/B-NP/O chat/NN/I-NP/O noir/JJ/I-NP/O'
|
||||
"s'/PRP/B-NP/O était/VB/B-VP/O assis/VBN/I-VP/O"
|
||||
'sur/IN/B-PP/B-PNP le/DT/B-NP/I-PNP tapis/NN/I-NP/I-PNP ././O/O')
|
||||
</pre></div>
|
||||
<p>The parser is based on <a href="http://alpage.inria.fr/~sagot/lefff-en.html">Le<em>fff</em></a>. For words in Le<em>fff</em> that can have multiple part-of-speech tags, we used <a href="http://www.lexique.org/">Lexique</a> to find the most frequent POS-tag. </p>
|
||||
<p class="small"><span style="text-decoration: underline;">References</span>: </p>
|
||||
<p class="small">Sagot, B. (2010). The Le<em>fff</em>, a freely available and large-coverage morphological and syntantic lexicon for French. <em>Proceedings of LREC'10</em>.</p>
|
||||
<p class="small">New, B., Pallier, C., Ferrand, L. & Matos, R. (2001). A lexical database for contemporary french: LEXIQUE. <em>L'année Psychologique</em>. </p>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1,431 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-graph</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-graph" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-graph</a></div>
|
||||
<h1>pattern.graph</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1392" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p class="big"><span style="font-size: 16px;">The pattern.graph module has tools for graph analysis (shortest path, centrality) and graph visualization in the browser. A graph is a network of nodes connected by edges. It can be used for example to study social networks or to model semantic relationships between concepts.</span></p>
|
||||
<p>It can be used by itself or with other <a href="pattern.html">pattern</a> modules: <a href="pattern-web.html">web</a> | <a href="pattern-db.html">db</a> | <a href="pattern-en.html">en</a> | <a href="pattern-search.html">search</a> | <a href="pattern-vector.html">vector</a> | graph.</p>
|
||||
<p><img style="border: 0px initial initial;" src="../g/pattern_schema.gif" alt="" width="620" height="180" /></p>
|
||||
<hr />
|
||||
<h2>Documentation</h2>
|
||||
<ul>
|
||||
<li><a href="#node">Node</a></li>
|
||||
<li><a href="#edge">Edge</a></li>
|
||||
<li><a href="#graph">Graph</a></li>
|
||||
<li><a href="#layout">Graph layout</a></li>
|
||||
<li><a href="#utility">Graph adjacency</a></li>
|
||||
<li><a href="#canvas">Visualization</a> <span class="link-maintenance">(</span><a class="link-maintenance" href="#canvas"><span class="smallcaps link-maintenance">export</span></a><span class="link-maintenance">)</span></li>
|
||||
<li><a href="#javascript">graph.js</a></li>
|
||||
</ul>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="node"></a>Node</h2>
|
||||
<p>A <span class="inline_code">Node</span> is an element with a unique id (a string or <span class="inline_code">int</span>) in a graph. A graph is a network of nodes and edges (connections between nodes). For example, the World Wide Web (WWW) can be represented as a vast graph with websites as nodes, website URLs as node id's, and hyperlinks as edges. Graph analysis can then be used to find important nodes (i.e., popular websites) and the shortest path between them.</p>
|
||||
<p>A <span class="inline_code">Node</span> takes a number of optional parameters used to style the graph <a class="link-maintenance" href="#canvas">visualization</a> of the graph: <span class="inline_code">radius</span> (node size), <span class="inline_code">text</span>, <span class="inline_code">fill</span> and <span class="inline_code">stroke</span> (colors; each a tuple of <a href="http://en.wikipedia.org/wiki/RGBA">RGBA</a> values between <span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>), <span class="inline_code">strokewidth</span>, <span class="inline_code">font</span>, <span class="inline_code">fontsize</span> and <span class="inline_code">fontweight</span>.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">node = Node(id="", **kwargs)</pre><pre class="brush:python; gutter:false; light:true;">node.graph # Parent Graph.
|
||||
node.id # Unique string or int.
|
||||
node.links # List of Node objects.
|
||||
node.edges # List of Edge objects.
|
||||
node.edge(node, reverse=False)
|
||||
</pre><pre class="brush:python; gutter:false; light:true;">node.weight # Eigenvector centrality (0.0-1.0).
|
||||
node.centrality # Betweenness centrality (0.0-1.0).
|
||||
node.degree # Degree centrality (0.0-1.0). </pre><pre class="brush:python; gutter:false; light:true;">node.x # 2D horizontal offset.
|
||||
node.y # 2D vertical offset.
|
||||
node.force # 2D Vector, updated by Graph.layout.
|
||||
node.radius # Default: 5
|
||||
node.fill # Default: None
|
||||
node.stroke # Default: (0,0,0,1)
|
||||
node.strokewidth # Default: 1
|
||||
node.text # Text object, or None.</pre><pre class="brush:python; gutter:false; light:true;">node.flatten(depth=1, traversable=lambda node, edge: True)
|
||||
</pre><ul>
|
||||
<li><span class="inline_code">Node.edge(node)</span> returns the <span class="inline_code">Edge</span> from this node to the given <span class="inline_code">node</span>, or <span class="inline_code">None</span>.</li>
|
||||
<li><span class="inline_code">Node.flatten()</span> returns a list with the node itself (<span class="inline_code">depth=0</span>), directly connected nodes (<span class="inline_code">depth=1</span>), nodes connected to those nodes (<span class="inline_code">depth=2</span>), and so on.</li>
|
||||
</ul>
|
||||
<p><span class="smallcaps">node weight and centrality</span></p>
|
||||
<p>A well-known task in graph analysis is measuring how important or <em>central</em> each node in the graph is. The pattern.graph module has three centrality measurements, adopted from <a href="http://networkx.lanl.gov/">NetworkX</a>.</p>
|
||||
<p><span class="inline_code">Node.weight</span> is the node's <em>eigenvector</em> centrality (= incoming traffic) as a value between <span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>. Nodes with more (indirect) incoming edges have a higher weight. For example, in the WWW, popular websites are those that are often linked to, where the popularity of the referring websites is taken into account.</p>
|
||||
<p><span class="inline_code">Node.centrality</span> is the node's <em>betweenness</em> centrality (= passing traffic) as a value between <span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>. Nodes that occur more frequently in paths between other nodes have a higher betweenness. They are often found at the intersection of different clusters of nodes (e.g., like a broker or a bridge).</p>
|
||||
<p><span class="inline_code">Node.degree</span> is the node's <em>degree</em> centrality (= local traffic) as a value between <span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>. Nodes with more edges have a higher degree.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="edge"></a>Edge</h2>
|
||||
<p>An <span class="inline_code">Edge</span> is a connection between two nodes. Its <span class="inline_code">weight</span> defines the importance of the connection. Edges with a higher weight are preferred when traversing the path between two (indirectly) connected nodes.</p>
|
||||
<p>An <span class="inline_code">Edge</span> takes optional parameters <span class="inline_code">stroke</span> (a tuple of <a href="http://en.wikipedia.org/wiki/RGBA">RGBA</a> values between <span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>) and <span class="inline_code">strokewidth</span>, which can be used to style the graph <a class="link-maintenance" href="#canvas">visualization</a>.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">edge = Edge(node1, node2, weight=0.0, length=1.0, type=None, **kwargs)</pre><pre class="brush:python; gutter:false; light:true;">edge.node1 # Node (sender).
|
||||
edge.node2 # Node (receiver).
|
||||
edge.weight # Connection strength.
|
||||
edge.length # Length modifier for the visualization.
|
||||
edge.type # Useful in semantic networks.
|
||||
edge.stroke # Default: (0,0,0,1)
|
||||
edge.strokewidth # Default: 1 </pre><p class="smallcaps"><br />directed graph</p>
|
||||
<p>An edge can be traversed in both directions: from <span class="inline_code">node1</span> → <span class="inline_code">node2</span>, and from <span class="inline_code">node2</span> → <span class="inline_code">node1</span>. The <span class="inline_code">Graph.shortest_path()</span> and <span class="inline_code">Graph.betweenness_centrality()</span> methods have a <span class="inline_code">directed</span> parameter which can be set to <span class="inline_code">True</span>, so that edges are only traversed from <span class="inline_code">node1</span> → <span class="inline_code">node2</span>. This is called a directed graph. Evidently, it produces different shortest paths and node weights.</p>
|
||||
<p>Two nodes can be connected by at most two edges (one in each direction). Otherwise, <span class="inline_code">Graph.add_edge()</span> simply returns the edge that already exists between the given nodes.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="graph"></a>Graph</h2>
|
||||
<p>A <span class="inline_code">Graph</span> is a network of nodes connected by edges, with methods for finding paths between (indirectly) connected nodes.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">graph = Graph(layout=SPRING, distance=10.0)</pre><pre class="brush:python; gutter:false; light:true;">graph[id] # Node with given id (Graph is a subclass of dict).
|
||||
graph.nodes # List of Node objects.
|
||||
graph.edges # List of Edge objects.
|
||||
graph.density # < 0.35 => sparse, > 0.65 => dense
|
||||
graph.layout # GraphSpringLayout.
|
||||
graph.distance # GraphSpringLayout spacing.
|
||||
</pre><pre class="brush:python; gutter:false; light:true;">graph.add_node(id) # Creates + returns new Node.
|
||||
graph.add_edge(id1, id2) # Creates + returns new Edge.
|
||||
graph.remove(node) # Removes given Node + edges.
|
||||
graph.remove(edge) # Removes given Edge.
|
||||
graph.prune(depth=0) # Removes nodes + edges if len(node.links) <= depth.
|
||||
graph.node(id) # Returns node with given id.
|
||||
graph.edge(id1, id2) # Returns edge connecting the given nodes.
|
||||
graph.copy(nodes=ALL) # Returns a new Graph.
|
||||
graph.split() # Returns a list of (unconnected) graphs.
|
||||
</pre><pre class="brush:python; gutter:false; light:true;">graph.eigenvector_centrality() # Updates all Node.weight values.
|
||||
graph.betweenness_centrality() # Updates all Node.centrality values. </pre><pre class="brush:python; gutter:false; light:true;">graph.shortest_path(node1, node2, heuristic=None, directed=False)
|
||||
graph.shortest_paths(node, heuristic=None, directed=False)
|
||||
graph.paths(node1, node2, length=4)
|
||||
graph.fringe(depth=0, traversable=lambda node, edge: True)
|
||||
</pre><pre class="brush:python; gutter:false; light:true;">graph.update(iterations=10, weight=10, limit=0.5)</pre><ul>
|
||||
<li><span class="inline_code"><span><span class="inline_code">Graph.add_node()</span></span></span> takes an id + any optional parameter of <span><span class="inline_code">Node</span></span>.</li>
|
||||
<li><span class="inline_code">Graph.add_edge()</span> takes two id's + any optional parameter of <span class="inline_code">Edge</span>.<br />Both methods have an optional <span class="inline_code">base</span> parameter that defines the subclass of <span class="inline_code">Node</span> or <span class="inline_code">Edge</span> to use.</li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li><span class="inline_code">Graph.prune()</span> removes all nodes with less or equal (undirected) connections than <span class="inline_code">depth</span>.</li>
|
||||
<li><span class="inline_code">Graph.copy()</span> returns a new <span class="inline_code">Graph</span> from the given list of nodes.</li>
|
||||
<li><span class="inline_code">Graph.split()</span> return a list of unconnected subgraphs.</li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li><span class="inline_code"><span><span class="inline_code">Graph.paths()</span></span></span> returns all paths (each a list of nodes) <= <span class="inline_code">length</span> connecting two given nodes.</li>
|
||||
<li><span class="inline_code"><span><span class="inline_code">Graph.shortest_path()</span></span></span> returns a list of nodes connecting the two given nodes<span class="inline_code"><span>.</span><br /></span></li>
|
||||
<li><span class="inline_code">Graph.shortest_paths()</span> returns a dictionary of node <span style="line-height: normal;">→</span> shortest path.<br />The optional <span class="inline_code">heuristic</span> function takes two node id's and returns a penalty (<span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>) for traversing their edges. With <span class="inline_code">directed=True</span>, edges are only traversable in one direction.</li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li><span class="inline_code">Graph.fringe()</span> returns a list of <em>leaf</em> nodes.<br />With <span class="inline_code">depth=0</span>, returns the nodes with one edge.<br />With <span class="inline_code">depth=1</span>, returns the nodes with one edge + the connected nodes, etc.</li>
|
||||
</ul>
|
||||
<p>For example:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.graph import Graph
|
||||
>>>
|
||||
>>> g = Graph()
|
||||
>>> for n1, n2 in (
|
||||
>>> ('cat', 'tail'), ('cat', 'purr'), ('purr', 'sound'),
|
||||
>>> ('dog', 'tail'), ('dog', 'bark'), ('bark', 'sound')):
|
||||
>>> g.add_node(n1)
|
||||
>>> g.add_node(n2)
|
||||
>>> g.add_edge(n1, n2, weight=0.0, type='is-related-to')
|
||||
>>>
|
||||
>>> for n in sorted(g.nodes, key=lambda n: n.weight):
|
||||
>>> print '%.2f' % n.weight, n
|
||||
|
||||
0.00 Node(id='cat')
|
||||
0.00 Node(id='dog')
|
||||
0.07 Node(id='purr')
|
||||
0.07 Node(id='bark')
|
||||
0.15 Node(id='tail')
|
||||
1.00 Node(id='sound')
|
||||
</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> for n in g.shortest_path('purr', 'bark'):
|
||||
>>> print n
|
||||
|
||||
Node(id='purr')
|
||||
Node(id='sound')
|
||||
Node(id='bark')
|
||||
</pre></div>
|
||||
<table border="0">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
<p>When sorted by <span class="inline_code">Node.weight</span> (i.e., eigenvector centrality), <em>sound</em> is the most important node in the network. This can be explained by observing the visualization on the right. Most nodes (indirectly) connect to <em>sound</em> or <em>tail</em>. No nodes connect to <em>dog</em> or <em>cat</em>, so these are the least important in the network (weight <span class="inline_code">0.0</span>).</p>
|
||||
<p>By default, nodes with a higher height will have a larger radius in the visualization.</p>
|
||||
</td>
|
||||
<td><img src="../g/pattern_graph3.jpg" alt="" width="170" height="155" /></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="layout"></a>Graph layout</h2>
|
||||
<p>A <span class="inline_code">GraphLayout</span> updates node positions (<span class="inline_code">Node.x</span>, <span class="inline_code">Node.y</span>) iteratively each time <span class="inline_code">GraphLayout.update()</span> is called. The pattern.graph module currently has one implementation: <span class="inline_code">GraphSpringLayout</span>, which uses a force-based algorithm where edges are regarded as springs. Connected nodes are pulled closer together (attraction) while other nodes are pushed further apart (repulsion).</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">layout = GraphSpringLayout(graph)</pre><pre class="brush:python; gutter:false; light:true;">layout.graph # Graph owner.
|
||||
layout.iterations # Starts at 0, +1 each update().
|
||||
layout.bounds # (x, y, width, height)-tuple.</pre><pre class="brush:python; gutter:false; light:true;">layout.k # Force constant (4.0)
|
||||
layout.force # Force multiplier (0.01)
|
||||
layout.repulsion # Maximum repulsion radius (50)</pre><pre class="brush:python; gutter:false; light:true;">layout.update(weight=10.0, limit=0.5) # weight = Edge.weight multiplier.
|
||||
layout.reset()
|
||||
layout.copy(graph)</pre><p><span class="small"><span style="text-decoration: underline;">Reference</span>: Hellesoy, A. & Hoover, D. (2006). http://ajaxian.com/archives/new-javascriptcanvas-graph-library</span></p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="utility"></a>Graph adjacency</h2>
|
||||
<p>The pattern.graph has a number of functions that can be used to modify graph edges:</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">unlink(graph, node1, node2=None)</pre><pre class="brush:python; gutter:false; light:true;">redirect(graph, node1, node2)</pre><pre class="brush:python; gutter:false; light:true;">cut(graph, node)</pre><pre class="brush:python; gutter:false; light:true;">insert(graph, node, a, b)</pre><ul>
|
||||
<li style="margin-bottom: 0.3em;"><span class="inline_code">unlink()</span> removes the edge between <span class="inline_code">node1</span> and <span class="inline_code">node2</span>. <br />If only <span class="inline_code">node1</span> is given, removes all edges to + from it. This does not remove <span class="inline_code">node1</span> from the graph.</li>
|
||||
<li style="margin-bottom: 0.3em;"><span class="inline_code">redirect()</span> connects <span class="inline_code">node1</span>'s edges to <span class="inline_code">node2</span> and removes <span class="inline_code">node1</span>.<br />If <span class="inline_code">A</span>, <span class="inline_code">B</span>, <span class="inline_code">C</span>, <span class="inline_code">D</span> are nodes and <span class="inline_code">A</span> → <span class="inline_code">B</span> and <span class="inline_code">C</span> → <span class="inline_code">D</span>, and we redirect <span class="inline_code">A</span> to <span class="inline_code">C</span>, then <span class="inline_code">C</span> → <span class="inline_code">B</span> and <span class="inline_code">C</span> → <span class="inline_code">D</span>.</li>
|
||||
<li style="margin-bottom: 0.3em;"><span class="inline_code">cut()</span> removes the given <span class="inline_code">node</span> and connects the surrounding nodes. <br />If <span class="inline_code">A</span>, <span class="inline_code">B</span>, <span class="inline_code">C</span>, <span class="inline_code">D</span> are nodes and <span class="inline_code">A</span> <span>→</span> <span class="inline_code">B</span> and <span class="inline_code">B</span> <span>→</span> <span class="inline_code">C</span> and <span class="inline_code">B</span> <span>→</span> <span class="inline_code">D</span>, and we cut <span class="inline_code">B</span>, then <span class="inline_code">A</span> <span>→</span> <span class="inline_code">C</span> and <span class="inline_code">A</span> <span>→</span> <span class="inline_code">D</span>.</li>
|
||||
<li><span class="inline_code">insert()</span> inserts the given <span class="inline_code">node</span> between node <span class="inline_code">a</span> and node <span class="inline_code">b</span>. <br />If <span class="inline_code">A</span>, <span class="inline_code">B</span>, <span class="inline_code">C</span> are nodes and <span class="inline_code">A</span> <span>→</span> <span class="inline_code">B</span>, and we insert <span class="inline_code">C</span>, then <span class="inline_code">A</span> <span>→</span> <span class="inline_code">C</span> and <span class="inline_code">C</span> <span>→</span> <span class="inline_code">B</span>.</li>
|
||||
</ul>
|
||||
<h3>Edge adjacency map</h3>
|
||||
<p><span style="font-variant: normal;">The <span class="inline_code">adjacency()</span> function returns a map of linked nodes:</span><span class="smallcaps"><br /></span></p>
|
||||
<pre class="brush:python; gutter:false; light:true;">adjacency(graph,
|
||||
directed = False,
|
||||
reversed = False,
|
||||
stochastic = False,
|
||||
heuristic = lambda node1, node2: 0)</pre><p>The return value is an <span class="inline_code">{id1:</span> <span class="inline_code">{id2:</span> <span class="inline_code">weight}}</span> dictionary with <span class="inline_code">Node.id</span>'s as keys, where each value is a dictionary of connected <span class="inline_code">Node.id</span>'s <span style="line-height: 18px;">→</span> <span class="inline_code">Edge.weight</span>.</p>
|
||||
<p>If <span class="inline_code">directed=True</span>, edges are only traversable in one direction. If <span class="inline_code">stochastic=True</span>, the edge weights for all neighbors of a given node sum to <span class="inline_code">1.0</span>. The optional <span class="inline_code">heuristic</span> function takes two node id's and returns an additional cost (<span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>) for traversing their edges. </p>
|
||||
<h3>Edge traversal</h3>
|
||||
<p>The <span class="inline_code">bfs()</span> function (breadth-first search) visits all nodes connected to the given <span class="inline_code">node</span>. <br />The <span class="inline_code">dfs()</span> function (depth-first search) visits all nodes connected to the given <span class="inline_code">node</span> depth-first, i.e., as far as possible along each path before backtracking.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">bfs(node, visit=lambda node: False, traversable=lambda node, edge: True)</pre><pre class="brush:python; gutter:false; light:true;">dfs(node, visit=lambda node: False, traversable=lambda node, edge: True)
|
||||
</pre><p>The given <span class="inline_code">visit</span> function is called with each visited node. Traversal will stop if it returns <span class="inline_code">True</span>, and subsequently <span class="inline_code">bfs()</span> or <span class="inline_code">dfs()</span> will return <span class="inline_code">True</span>.</p>
|
||||
<p>The given <span class="inline_code">traversable</span> function takes the visited <span class="inline_code">Node</span> and an <span class="inline_code">Edge</span> and returns <span class="inline_code">True</span> if we are allowed to follow this connection to the next node. For example, the traversable for directed edges:</p>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">>>> def directed(node, edge):
|
||||
>>> return node.id == edge.node1.id
|
||||
>>>
|
||||
>>> dfs(g, traversable=directed) </pre></div>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="canvas"></a>Visualization</h2>
|
||||
<p>The pattern.graph module has a JavaScript counterpart (graph.js) that can be used to visualize a graph in a web page, as a HTML <canvas> element. The HTML <canvas> element allows dynamic, scriptable rendering of 2D shapes and bitmap images (see also Pattern's <a class="link-maintenance" href="pattern-canvas.html">canvas.js</a>).</p>
|
||||
<p><span class="inline_code">Graph.export(</span>) creates a new file folder at the given <span class="inline_code">path</span> with an index.html (the visualization), a style.css, graphs.js and canvas.js. The optional parameter <span class="inline_code">javascript</span> defines the URL path to graph.js and canvas.js (which will not be included in this case).</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">graph.export(path, encoding='utf-8', **kwargs)</pre><div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.graph import Graph
|
||||
>>>
|
||||
>>> g = Graph()
|
||||
>>> for n1, n2 in (
|
||||
>>> ('cat', 'tail'), ('cat', 'purr'), ('purr', 'sound'),
|
||||
>>> ('dog', 'tail'), ('dog', 'bark'), ('bark', 'sound')):
|
||||
>>> g.add_node(n1)
|
||||
>>> g.add_node(n2)
|
||||
>>> g.add_edge(n1, n2, weight=0.0, type='is-related-to')
|
||||
>>>
|
||||
>>> g.export('sound', directed=True)</pre></div>
|
||||
<p>Nodes and edges will be styled according to their <span class="inline_code">fill</span>, <span class="inline_code">stroke</span>, and <span class="inline_code">strokewidth</span> properties.</p>
|
||||
<p>The following parameters can be used to customize the visualization:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Parameter</span></td>
|
||||
<td><span class="smallcaps">Default</span></td>
|
||||
<td><span class="smallcaps">Description</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">javascript</span></td>
|
||||
<td><span class="inline_code">''</span></td>
|
||||
<td>Path to canvas.js and graph.js.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">stylesheet</span></td>
|
||||
<td class="inline_code"><span class="inline_code">INLINE</span></td>
|
||||
<td>Path to CSS: INLINE, <span class="inline_code">DEFAULT</span> (generates style.css), <span class="inline_code">None</span> or path.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">title</span></td>
|
||||
<td><span class="inline_code">'Graph'</span></td>
|
||||
<td>HTML <span class="inline_code"><span><span class="inline_code"><title>Graph</title></span>.</span></span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">id</span></td>
|
||||
<td><span class="inline_code">'graph'</span></td>
|
||||
<td>HTML <span class="inline_code"><div</span> <span class="inline_code">id="graph"></span> contains the <span class="inline_code"><canvas></span>.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border: 0; font-size: 0.5em;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">ctx</span></td>
|
||||
<td><span class="inline_code">'canvas.element'</span></td>
|
||||
<td>HTML <span class="inline_code"><canvas></span> element to use for drawing.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">width</span></td>
|
||||
<td><span class="inline_code">700</span></td>
|
||||
<td>Canvas width in pixels.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">height</span></td>
|
||||
<td><span class="inline_code">500</span></td>
|
||||
<td>Canvas height in pixels.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">frames</span></td>
|
||||
<td><span class="inline_code">500</span></td>
|
||||
<td>Number of frames of animation.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">ipf</span></td>
|
||||
<td><span class="inline_code">2</span></td>
|
||||
<td><span class="inline_code">GraphLayout.update()</span> iterations per frame.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border: 0; font-size: 0.5em;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">directed</span></td>
|
||||
<td><span class="inline_code">False</span></td>
|
||||
<td>Visualize eigenvector centrality as an edge arrow?</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">weighted</span></td>
|
||||
<td><span class="inline_code">False</span></td>
|
||||
<td>Visualize betweenness centrality as a node shadow?</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">pack</span></td>
|
||||
<td><span class="inline_code">True</span></td>
|
||||
<td>Shorten leaf edges + add node weight to node radius.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border: 0; font-size: 0.5em;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">distance</span></td>
|
||||
<td><span class="inline_code">graph.distance</span></td>
|
||||
<td>Average edge length.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">k</span></td>
|
||||
<td><span class="inline_code">graph.k</span></td>
|
||||
<td>Force constant.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">force</span></td>
|
||||
<td><span class="inline_code">graph.force</span></td>
|
||||
<td>Force dampener.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">repulsion</span></td>
|
||||
<td><span class="inline_code">graph.repulsion</span></td>
|
||||
<td>Force radius.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border: 0; font-size: 0.5em;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">href</span></td>
|
||||
<td><span class="inline_code">{}</span></td>
|
||||
<td>Dictionary of <span class="inline_code">Node.id</span> => URL.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">css</span></td>
|
||||
<td><span class="inline_code">{}</span></td>
|
||||
<td>Dictionary of <span class="inline_code">Node.id</span> => CSS classname.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>To export a static visualization, use <span class="inline_code">frames=1</span> and <span class="inline_code">ipf=0</span>.<br /> </p>
|
||||
<p class="smallcaps">Server-side scripting</p>
|
||||
<p><span class="inline_code">Graph.serialize()</span> returns a string with (a portion of) the HTML, CSS and JavaScript source code of the visualization. It can be used to serve a dynamic web page. With <span class="inline_code">type=CANVAS</span>, it returns a HTML string with a <span class="inline_code"><div</span> <span class="inline_code">id="graph"></span> that contains the canvas.js animation. With <span class="inline_code">type=DATA</span>, it returns a Javascript string that initializes the <span class="inline_code">Graph</span> in variable <span class="inline_code">g</span> (which will draw to <span class="inline_code">ctx</span>).</p>
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">graph.serialize(type=HTML, **kwargs) # HTML | CSS | CANVAS | DATA</pre><div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">>>> import cherrypy
|
||||
>>>
|
||||
>>> class Visualization(object):
|
||||
>>> def index(self):
|
||||
>>> return (
|
||||
>>> '<html>'
|
||||
>>> '<head>'
|
||||
>>> '<script src="canvas.js"></script>'
|
||||
>>> '<script src="graph.js"></script>'
|
||||
>>> '</head>'
|
||||
>>> '<body>' + g.serialize(CANVAS, directed=True) +
|
||||
>>> '</body>'
|
||||
>>> '</html>'
|
||||
>>> )
|
||||
>>> index.exposed = True
|
||||
>>>
|
||||
>>> cherrypy.quickstart(Visualization())</pre></div>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="javascript"></a>graph.js</h2>
|
||||
<p>Below is a standalone demonstration of graph.js, without using <span class="inline_code">export()</span> or canvas.js. The <span class="inline_code">Graph.loop()</span> method fires the spring layout algorithm (<span class="link-maintenance"><a href="http://www.clips.ua.ac.be/media/pattern-graph/random" target="_blank">view live demo</a></span>).</p>
|
||||
<p><img class="border" src="../g/pattern_graph4.jpg" alt="" width="610" height="390" /></p>
|
||||
<div class="example">
|
||||
<pre class="brush:xml; gutter:false; light:true;"><!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<style>
|
||||
#graph { display: block; position: relative; overflow: hidden; }
|
||||
#graph .node-label { font: 11px sans-serif; }
|
||||
</style>
|
||||
<script src="graph.js"></script>
|
||||
<script>
|
||||
</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush: jscript;gutter: false; light: true; fontsize: 100; first-line: 1; "> function spring() {
|
||||
SHADOW = 0.65 // slow...
|
||||
g = new Graph(document.getElementById("_ctx"));
|
||||
// Random nodes.
|
||||
for (var i=0; i < 50; i++) {
|
||||
g.addNode(i+1);
|
||||
}
|
||||
// Random edges.
|
||||
for (var j=0; j < 75; j++) {
|
||||
var n1 = choice(g.nodes);
|
||||
var n2 = choice(g.nodes);
|
||||
g.addEdge(n1, n2, {weight: Math.random()});
|
||||
}
|
||||
g.prune(0);
|
||||
g.betweennessCentrality();
|
||||
g.eigenvectorCentrality();
|
||||
g.loop({frames:500, fps:30, ipf:2, weighted:0.5, directed:true});
|
||||
}
|
||||
</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush:xml; gutter:false; light:true;"> </script>
|
||||
</head>
|
||||
<body onload="spring();">
|
||||
<div id="graph" style="width:700px; height:500px;">
|
||||
<canvas id="_ctx" width="700" height="500"></canvas>
|
||||
</div>
|
||||
</body>
|
||||
</html> </pre></div>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2>See also</h2>
|
||||
<ul>
|
||||
<li><a href="http://gephi.org/" target="_blank">Gephi</a> (GPL): ne<span>twork analysis & visualization GUI.</span></li>
|
||||
<li><a href="http://networkx.lanl.gov/" target="_blank">NetworkX</a> (BSD): <span>network analysis toolkit for Python + NumPy.</span></li>
|
||||
<li><a href="http://www.cityinabottle.org/nodebox/" target="_blank">NodeBox</a> (BSD): g<span>raphics toolkit for Python + OpenGL.</span></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1,613 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-it</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-it" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-it</a></div>
|
||||
<h1>pattern.it</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1698" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p><span class="big">The pattern.it module contains a fast part-of-speech tagger for Italian (identifies nouns, adjectives, verbs, etc. in a sentence) and tools for Italian verb conjugation and noun singularization & pluralization.</span></p>
|
||||
<p>It can be used by itself or with other <a href="pattern.html">pattern</a> modules: <a href="pattern-web.html">web</a> | <a href="pattern-db.html">db</a> | <a href="pattern-en.html">en</a> | <a href="pattern-search.html">search</a> | <a href="pattern-vector.html">vector</a> | <a href="pattern-graph.html">graph</a>.</p>
|
||||
<p><img src="../g/pattern_schema_it.gif" alt="" /></p>
|
||||
<hr />
|
||||
<h2>Documentation</h2>
|
||||
<p>The functions in this module take the same parameters and return the same values as their counterparts in <a href="pattern-en.html">pattern.en</a>. Refer to the documentation there for more details. </p>
|
||||
<h3>Gender</h3>
|
||||
<p>Italian nouns and adjectives inflect according to gender. The <span class="inline_code">gender()</span> function predicts the gender (<span class="inline_code">MALE</span>, <span class="inline_code">FEMALE</span>, <span class="inline_code">PLURAL</span>) of a given noun with about 92% accuracy: </p>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">>>> from pattern.it import gender, MALE, FEMALE, PLURAL
|
||||
>>> print gender('gatti')
|
||||
|
||||
(MALE, PLURAL)</pre></div>
|
||||
<h3>Article</h3>
|
||||
<p>The <span class="inline_code">article()</span> function returns the article (<span class="inline_code">INDEFINITE</span> or <span class="inline_code">DEFINITE</span>) inflected by gender (e.g., <em><span style="text-decoration: underline;">il</span> gatto</em> → <em><span style="text-decoration: underline;">i</span> gatti</em>).</p>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">>>> from pattern.it import article, DEFINITE, MALE, PLURAL
|
||||
>>> print article('gatti', DEFINITE, gender=(MALE, PLURAL))
|
||||
|
||||
i</pre></div>
|
||||
<h3>Noun singularization & pluralization</h3>
|
||||
<p>For Italian nouns there is <span class="inline_code">singularize()</span> and <span class="inline_code">pluralize()</span>. The implementation is slightly less robust than the English version (accuracy 84% for singularization and 93% for pluralization).</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.it import singularize, pluralize
|
||||
>>>
|
||||
>>> print singularize('gatti')
|
||||
>>> print pluralize('gatto')
|
||||
|
||||
gatto
|
||||
gatti </pre></div>
|
||||
<h3>Verb conjugation</h3>
|
||||
<p>For Italian verbs there is <span class="inline_code">conjugate()</span>, <span class="inline_code">lemma()</span>, <span class="inline_code">lexeme()</span> and <span class="inline_code">tenses()</span>. The lexicon for verb conjugation contains about 1,250 common Italian verbs, mined from Wiktionary. For unknown verbs it will fall back to a rule-based approach with an accuracy of about 86%. </p>
|
||||
<p>Italian verbs have more tenses than English verbs. In particular, the plural differs for each person, and there are additional forms for the <span class="inline_code">FUTURE</span> tense, the <span class="inline_code">IMPERATIVE</span>, <span class="inline_code">CONDITIONAL</span> and <span class="inline_code">SUBJUNCTIVE</span> mood and the <span class="inline_code">PERFECTIVE</span> aspect:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.it import conjugate
|
||||
>>> from pattern.it import INFINITIVE, PRESENT, PAST, SG, SUBJUNCTIVE, PERFECTIVE
|
||||
>>>
|
||||
>>> print conjugate('sono', INFINITIVE)
|
||||
>>> print conjugate('sono', PRESENT, 1, SG, mood=SUBJUNCTIVE)
|
||||
>>> print conjugate('sono', PAST, 3, SG)
|
||||
>>> print conjugate('sono', PAST, 3, SG, aspect=PERFECTIVE)
|
||||
|
||||
essere
|
||||
sia
|
||||
era
|
||||
fu </pre></div>
|
||||
<p>For <span class="inline_code">PAST</span> tense + <span class="inline_code">PERFECTIVE</span> aspect we can also use <span class="inline_code">PRETERITE</span> (<em>passato remoto</em>) For <span class="inline_code">PAST</span> tense + <span class="inline_code">IMPERFECTIVE</span> aspect we can also use <span class="inline_code">IMPERFECT</span> (<em>imperfetto</em>).</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.it import conjugate
|
||||
>>> from pattern.it import IMPERFECT, PRETERITE
|
||||
>>>
|
||||
>>> print conjugate('sono', IMPERFECT, 3, SG)
|
||||
>>> print conjugate('sono', PRETERITE, 3, SG)
|
||||
|
||||
era
|
||||
fu </pre></div>
|
||||
<p> The <span class="inline_code">conjugate()</span> function takes the following optional parameters:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="smallcaps">Tense</td>
|
||||
<td class="smallcaps">Person</td>
|
||||
<td class="smallcaps">Number</td>
|
||||
<td class="smallcaps">Mood</td>
|
||||
<td class="smallcaps">Aspect</td>
|
||||
<td class="smallcaps">Alias</td>
|
||||
<td class="smallcaps">Example</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">INFINITVE</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">"inf"</td>
|
||||
<td><em>essere</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">sono</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">sei</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">è</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">siamo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">siete</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">sono</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PROGRESSIVE</td>
|
||||
<td class="inline_code">"part"</td>
|
||||
<td><em>essendo</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg!"</td>
|
||||
<td><em>sii</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg!"</td>
|
||||
<td><em>sia</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl!"</td>
|
||||
<td><em>siamo</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl!"</td>
|
||||
<td><em>siate</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl!"</td>
|
||||
<td><em>siano</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg?"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">sia</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg?"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">sia</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg?"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">sia</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl?"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">siamo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl?"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">siate</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl?"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">siano</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">ero</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">eri</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">era</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">e</span><span style="text-decoration: underline;">ravamo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">eravate</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">erano</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PROGRESSIVE</td>
|
||||
<td class="inline_code">"ppart"</td>
|
||||
<td><em>stato</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp+"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">fui</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp+"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">fosti</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp+"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">fu</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl+"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">fummo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl+"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">foste</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl+"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">furono</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp?"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">fossi</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp?"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">fossi</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp?"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">fosse</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl?"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">fossimo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl?"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">foste</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl?"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">fossero</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgf"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">sarò</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgf"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">sarai</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgf"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">sarà</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1plf"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">saremo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2plf"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">sarete</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3plf"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">saranno</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg->"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">sarei</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg->"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">saresti</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg->"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">sarebbe</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl->"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">saremmo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl->"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">sareste</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl->"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">sarebbero</span></em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>Instead of optional parameters, a single short alias, or <span class="inline_code">PARTICIPLE</span> or <span class="inline_code">PAST+PARTICIPLE</span> can also be given. With no parameters, the infinitive form of the verb is returned.</p>
|
||||
<h3>Attributive & predicative adjectives </h3>
|
||||
<p>Italian adjectives inflect with suffixes <span class="inline_code">-o</span> → <span class="inline_code">-i</span> (masculine) and <span class="inline_code">-a</span> → <span class="inline_code">-e</span> (feminine), with some exceptions (e.g., <em>grande</em> → <em>i grandi felini</em>). You can get the base form with the <span class="inline_code">predicative()</span> function. A statistical approach is used with an accuracy of 88%.</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.it import attributive
|
||||
>>> print predicative('grandi')
|
||||
|
||||
grande </pre></div>
|
||||
<h3>Parser</h3>
|
||||
<p>For parsing there is <span class="inline_code" style="font-family: Courier, monospace; font-size: 12px;">parse()</span>, <span class="inline_code">parsetree()</span> and <span class="inline_code" style="font-family: Courier, monospace; font-size: 12px;">split()</span>. The <span class="inline_code">parse()</span> function annotates words in the given string with their part-of-speech <a class="link-maintenance" href="mbsp-tags.html">tags</a> (e.g., <span class="postag">NN</span> for nouns and <span class="postag">VB</span> for verbs). The <span class="inline_code">parsetree()</span> function takes a string and returns a tree of nested objects (<span class="inline_code">Text</span> → <span class="inline_code">Sentence</span> → <span class="inline_code">Chunk</span> → <span class="inline_code">Word</span>). The <span class="inline_code">split()</span> function takes the output of <span class="inline_code">parse()</span> and returns a <span class="inline_code">Text</span>. See the <span class="inline_code">pattern.en</span> documentation (<span class="link-maintenance" style="color: #78aaff;"><a style="color: #8caaff; outline-style: none !important; outline-width: initial !important; outline-color: initial !important;" href="pattern-en.html#tree">here</a></span>) how to manipulate <span class="inline_code">Text</span> objects. </p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.it import parse, split
|
||||
>>>
|
||||
>>> s = parse('Il gatto nero faceva le fusa.')
|
||||
>>> for sentence in split(s):
|
||||
>>> print sentence
|
||||
|
||||
Sentence('Il/DT/B-NP/O gatto/NN/I-NP/O nero/JJ/I-NP/O'
|
||||
'faceva/VB/B-VP/O'
|
||||
'le/DT/B-NP/O fusa/NN/I-NP/O ././O/O')
|
||||
</pre></div>
|
||||
<p>The parser is mined from Wiktionary. The accuracy is around 92%.</p>
|
||||
<h3>Sentiment analysis</h3>
|
||||
<p>There's no <span class="inline_code">sentiment()</span> function for Italian yet.</p>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1,105 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-nl</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-nl" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-nl</a></div>
|
||||
<h1>pattern.nl</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1418" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p><span class="big">The pattern.nl module contains a fast part-of-speech tagger for Dutch (identifies nouns, adjectives, verbs, etc. in a sentence), sentiment analysis, and tools for Dutch verb conjugation and noun singularization & pluralization.</span></p>
|
||||
<p>It can be used by itself or with other <a href="pattern.html">pattern</a> modules: <a href="pattern-web.html">web</a> | <a href="pattern-db.html">db</a> | <a href="pattern-en.html">en</a> | <a href="pattern-search.html">search</a> | <a href="pattern-vector.html">vector</a> | <a href="pattern-graph.html">graph</a>.</p>
|
||||
<p><img src="../g/pattern_schema_nl.gif" alt="" width="620" height="180" /></p>
|
||||
<hr />
|
||||
<h2>Documentation</h2>
|
||||
<p>The functions in this module take the same parameters and return the same values as their counterparts in <a href="pattern-en.html">pattern.en</a>. Refer to the documentation there for more details. </p>
|
||||
<h3>Noun singularization & pluralization</h3>
|
||||
<p>For Dutch nouns there is <span class="inline_code">singularize()</span> and <span class="inline_code">pluralize()</span>. The implementation is slightly less robust than the English version (accuracy 91% for singularization and 80% for pluralization).</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.nl import singularize, pluralize
|
||||
>>>
|
||||
>>> print singularize('katten')
|
||||
>>> print pluralize('kat')
|
||||
|
||||
kat
|
||||
katten </pre></div>
|
||||
<h3>Verb conjugation</h3>
|
||||
<p>For Dutch verbs there is <span class="inline_code">conjugate()</span>, <span class="inline_code">lemma()</span>, <span class="inline_code">lexeme()</span> and <span class="inline_code">tenses()</span>. The lexicon for verb conjugation contains about 4,000 common Dutch verbs. For unknown verbs it will fall back to a rule-based approach with an accuracy of about 81%.</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.nl import conjugate
|
||||
>>> from pattern.nl import INFINITIVE, PRESENT, SG
|
||||
>>>
|
||||
>>> print conjugate('ben', INFINITIVE)
|
||||
>>> print conjugate('ben', PRESENT, 2, SG)
|
||||
|
||||
zijn
|
||||
bent </pre></div>
|
||||
<h3>Attributive & predicative adjectives </h3>
|
||||
<p>Dutch adjectives followed by a noun inflect with an <span class="inline_code">-e</span> suffix (e.g., <em>braaf</em> → <em>brave kat</em>). You can get the base form with the <span class="inline_code">predicative()</span> function, or vice versa with <span class="inline_code">attributive()</span>. Accuracy is 99%.</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.nl import attributive, predicative
|
||||
>>>
|
||||
>>> print predicative('brave')
|
||||
>>> print attributive('braaf')
|
||||
|
||||
braaf
|
||||
brave </pre></div>
|
||||
<h3 class="example">Sentiment analysis</h3>
|
||||
<p class="example">For opinion mining there is <span class="inline_code">sentiment()</span>, which returns a (<span class="inline_code">polarity</span>, <span class="inline_code">subjectivity</span>)-tuple, based on a lexicon of adjectives. Polarity is a value between <span class="inline_code">-1.0</span> and <span class="inline_code">+1.0</span>, subjectivity between <span class="inline_code">0.0</span> and <span class="inline_code">1.0</span>. The accuracy is around 82% (P 0.79, R 0.86) for book reviews:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.nl import sentiment
|
||||
>>> print sentiment('Een onwijs spannend goed boek!')
|
||||
|
||||
(0.69, 0.90) </pre></div>
|
||||
<h3>Parser</h3>
|
||||
<p>For parsing there is <span class="inline_code">parse()</span>, <span class="inline_code">parsetree()</span> and <span class="inline_code">split()</span>. The <span class="inline_code">parse()</span> function annotates words in the given string with their part-of-speech <a class="link-maintenance" href="mbsp-tags.html">tags</a> (e.g., <span class="postag">NN</span> for nouns and <span class="postag">VB</span> for verbs). The parsetree() function takes a string and returns a tree of nested objects (<span class="inline_code">Text</span> → <span class="inline_code">Sentence</span> → <span class="inline_code">Chunk</span> → <span class="inline_code">Word</span>). The <span class="inline_code">split()</span> function takes the output of <span class="inline_code">parse()</span> and returns a <span class="inline_code">Text</span>. See the pattern.en documentation (<a class="link-maintenance" href="pattern-en.html#tree">here</a>) how to manipulate <span class="inline_code">Text</span> objects. </p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.nl import parse, split
|
||||
>>>
|
||||
>>> s = parse('De kat zit op de mat.')
|
||||
>>> for sentence in split(s):
|
||||
>>> print sentence
|
||||
|
||||
Sentence('De/DT/B-NP/O kat/NN/I-NP/O zit/VBZ/B-VP/O op/IN/B-PP/B-PNP'
|
||||
'de/DT/B-NP/I-PNP mat/NN/I-NP/I-PNP ././O/O')</pre></div>
|
||||
<p>The parser is built on Jeroen Geertzen's <a href="http://cosmion.net/jeroen/software/brill_pos/" target="_blank">Dutch language model</a>. The accuracy is around 91%. The original <a href="http://lands.let.ru.nl/literature/hvh.1999.2.ps" target="_blank">WOTAN</a> tagset is mapped to <a href="mbsp-tags.html">Penn Treebank</a>. If you need to work with the original tags you can also use <span class="inline_code">parse()</span> with an optional parameter <span class="inline_code">tagset="WOTAN"</span>.</p>
|
||||
<p class="small"><span style="text-decoration: underline;">Reference</span>: Geertzen, J. (2010). <em>Brill-NL. </em>Retrieved from: <a class="noexternal" style="color: inherit;" href="http: //cosmion.net/jeroen/software/brill_pos/" target="_blank">http: //cosmion.net/jeroen/software/brill_pos/</a>.</p>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1,424 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-search</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-search" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-search</a></div>
|
||||
<h1>pattern.search</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1357" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p class="big">The pattern.search module has a pattern matching system similar to regular expressions, that can be used to search a string by syntax (word function) or by semantics (word meaning).<span class="blue"> </span></p>
|
||||
<p>It can be used by itself or with other <a href="pattern.html">pattern</a> modules: <a href="pattern-web.html">web</a> | <a href="pattern-db.html">db</a> | <a href="pattern-en.html">en</a> | search <span class="blue"> </span>| <a href="pattern-vector.html">vector</a> | <a href="pattern-graph.html">graph</a>.</p>
|
||||
<p><img src="../g/pattern_schema.gif" alt="" width="620" height="180" /></p>
|
||||
<hr />
|
||||
<h2>Documentation</h2>
|
||||
<ul>
|
||||
<li><a href="#introduction">Searching + matching in a nutshell</a></li>
|
||||
<li><a href="#pattern">Pattern</a></li>
|
||||
<li><a href="#constraint">Constraint</a></li>
|
||||
<li><a href="#match">Match</a></li>
|
||||
<li><a href="#taxonomy">Taxonomy</a></li>
|
||||
<li><a href="#utility">Utility functions</a></li>
|
||||
</ul>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="introduction"></a>Searching + matching in a nutshell</h2>
|
||||
<p>The <span class="inline_code">search()</span> function takes a string (e.g., a word or a sequence of words) and returns a list of non-overlapping matches in the given sentence. The <span class="inline_code">match()</span> function returns the first match, or <span class="inline_code">None</span>. Both functions call <span class="inline_code">compile()</span>, which takes a string and returns a <span class="inline_code">Pattern</span> object.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">search(pattern, sentence)</pre><pre class="brush:python; gutter:false; light:true;">match(pattern, sentence)</pre><pre class="brush:python; gutter:false; light:true;">compile(pattern)</pre><div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.search import search
|
||||
>>> print search('rabbit', 'big white rabbit')
|
||||
|
||||
[Match(words=[Word('rabbit')])]</pre></div>
|
||||
<p>Search strings can contain a wildcard character at the <span class="inline_code">*start</span>, at the <span class="inline_code">end*</span>, at <span class="inline_code">*both*</span> ends or <span class="inline_code">in*between</span>:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> print search('rabbit*', 'big white rabbit')
|
||||
>>> print search('rabbit*', 'big white rabbits')
|
||||
|
||||
[Match(words=[Word('rabbit')])]
|
||||
[Match(words=[Word('rabbits')])]
|
||||
</pre></div>
|
||||
<p>Search strings can contain multiple options separated by a vertical dash:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> print search('rabbit|cony|bunny', 'big black bunny')
|
||||
|
||||
[Match(words=[Word('bunny')])]</pre></div>
|
||||
<h3>Syntactical pattern matching</h3>
|
||||
<p>The examples above can also be resolved with (faster) regular expressions. The pattern.search module is more useful with <em>parsed</em> sentences. The pattern.en module has a <a class="link-maintenance" href="pattern-en.html#parser">parser</a> that takes a string and assigns a part-of-speech tag to each word (e.g., <span class="postag">NN</span> = noun, <span class="postag">VB</span> = verb, <span class="postag">JJ</span> = adjective). The parser also groups words into chunks (e.g., <span class="postag">JJ</span> + <span class="postag">NN</span> = <span class="postag">NP</span> = noun phrase) and finds word lemmata (was → be).</p>
|
||||
<p>A parsed <span class="inline_code">Sentence</span> or <span class="inline_code">Text</span> can be searched by part-of-speech tags:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.search import search
|
||||
>>> from pattern.en import parsetree
|
||||
>>>
|
||||
>>> t = parsetree('big white rabbit')
|
||||
>>> print t
|
||||
>>> print
|
||||
>>> print search('JJ', t) # all adjectives
|
||||
>>> print search('NN', t) # all nouns
|
||||
>>> print search('NP', t) # all noun phrases
|
||||
|
||||
[Sentence('big/JJ/B-NP/O white/JJ/I-NP/O rabbit/NN/I-NP/O')]
|
||||
|
||||
[Match(words=[Word(u'big/JJ')]), Match(words=[Word(u'white/JJ')])]
|
||||
[Match(words=[Word(u'rabbit/NN')])]
|
||||
[Match(words=[Word(u'big/JJ'), Word(u'white/JJ'), Word(u'rabbit/NN')])]</pre></div>
|
||||
<h3>Semantical pattern matching</h3>
|
||||
<p>A <span class="inline_code">Taxonomy</span> can be used to define semantical categories of words. Say we want to extract flower names from a text. The search pattern is rather clumsy: <span class="inline_code">"rose|lily|daisy|daffodil|begonia"</span>. A more robust approach is to work with a taxonomy:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.search import search, taxonomy
|
||||
>>> from pattern.en import parsetree
|
||||
>>>
|
||||
>>> for f in ('rose', 'lily', 'daisy', 'daffodil', 'begonia'):
|
||||
>>> taxonomy.append(f, type='flower')
|
||||
>>>
|
||||
>>> t = parsetree('A field of white daffodils.', lemmata=True)
|
||||
>>> print t
|
||||
>>> print
|
||||
>>> print search('FLOWER', t)
|
||||
|
||||
[Sentence('A/DT/B-NP/O/a field/NN/I-NP/O/field of/IN/B-PP/B-PNP/of'
|
||||
'white/JJ/B-NP/I-PNP/white daffodils/NNS/I-NP/I-PNP/daffodil ././O/O/.')]
|
||||
|
||||
[Match(words=[Word(u'white/JJ'), Word(u'daffodils/NNS')])]
|
||||
</pre></div>
|
||||
<p>Note how the search pattern has <span class="inline_code">"FLOWER"</span> in uppercase. Since <span class="inline_code">search()</span> is case-insensitive, uppercase words are recognized as taxonomy terms (i.e., <span class="postag">FLOWER</span> = rose + lily + daisy + daffodil + begonia). Furthermore, since lemmata were parsed, <em>daffodils</em> is recognized as the plural form of <em>daffodil</em> (the lemma), and as such also part of <span class="postag">FLOWER</span>.</p>
|
||||
<p>Note that the returned match is <em>white daffodils</em>. Since <span class="inline_code">search()</span> is (by default) greedy, the whole <span class="postag">NP</span> chunk is matched. In other words, <em>white daffodils</em> is regarded as a more specific instance of <em>daffodil</em>.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="pattern"></a>Pattern</h2>
|
||||
<p>A <span class="inline_code">Pattern</span> is a sequence of constraints that matches certain phrases in a (parsed) sentence. Each constraint can match a word in the sentence. If a number of successive words corresponds to the entire sequence of constraints, the phrase is a match. The search is case-insensitive.</p>
|
||||
<p>Constraints can be constructed for syntax (e.g., find all adjectives) and semantics (e.g., find all product names). For example, <span class="inline_code">Pattern.fromstring("NP</span> <span class="inline_code">be</span> <span class="inline_code">*</span> <span class="inline_code">than</span> <span class="inline_code">NP")</span> matches phrases such as <em><span style="text-decoration: underline;">the cat</span> was faster than <span style="text-decoration: underline;">the mouse</span></em>, and <em><span style="text-decoration: underline;">Chuck Norris</span> is cooler than <span style="text-decoration: underline;">Dolph Lundgren</span></em>, since <span class="postag">NP</span> matches any noun phrase.<em> </em>With <span class="inline_code">TAXONOMY</span>, the global <span class="inline_code">taxonomy</span> is used to categorize words.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">pattern = Pattern(sequence=[])</pre><pre class="brush:python; gutter:false; light:true;">pattern = Pattern.fromstring(string, taxonomy=TAXONOMY)</pre><pre class="brush:python; gutter:false; light:true;">pattern.sequence # List of Constraint objects.
|
||||
pattern.groups # List of groups, each a list of Constraint objects.
|
||||
pattern.strict # Disable greedy matching?
|
||||
</pre><pre class="brush:python; gutter:false; light:true;">pattern.scan(string)
|
||||
pattern.search(sentence)
|
||||
pattern.match(sentence, start=0)</pre><ul>
|
||||
<li><span class="inline_code">Pattern.scan()</span> returns <span class="inline_code">True</span> if <span class="inline_code">Sentence(string)</span> <em>may</em> yield matches.<br />It can be faster to scan a tagged string, before casting it to a <span class="inline_code">Sentence</span> or <span class="inline_code">Text</span> and searching it. </li>
|
||||
<li><span class="inline_code">Pattern.search()</span> returns a list of <span class="inline_code">Match</span> objects from the given sentence.</li>
|
||||
<li><span class="inline_code">Pattern.match()</span> returns the first <span class="inline_code">Match</span> found in the given sentence, or <span class="inline_code">None</span>.</li>
|
||||
</ul>
|
||||
<div>For example:</div>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">>>> from pattern.search import Pattern
|
||||
>>> from pattern.en import parsetree
|
||||
>>>
|
||||
>>> t = parsetree('Chuck Norris is cooler than Dolph Lundgren.', lemmata=True)
|
||||
>>> p = Pattern.fromstring('{NP} be * than {NP}')
|
||||
>>> m = p.match(t)
|
||||
>>> print m.group(1)
|
||||
>>> print m.group(2)
|
||||
|
||||
[Word(u'Chuck/NNP'), Word(u'Norris/NNP')]
|
||||
[Word(u'Dolph/NNP'), Word(u'Lundgren/NNP')]</pre></div>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="constraint"></a>Constraint</h2>
|
||||
<p>A <span class="inline_code">Constraint</span> matches a set of (tagged) words and taxonomy terms. For example:</p>
|
||||
<ul>
|
||||
<li><span class="inline_code">Constraint.fromstring('with|of')</span> matches either <em>with</em> or <em>of</em>.</li>
|
||||
<li><span class="inline_code">Constraint.fromstring('JJ?')</span> matches any adjective tagged <span class="postag">JJ</span>, but it is optional.</li>
|
||||
<li><span class="inline_code">Constraint.fromstring('NP|SBJ')</span> matches subject noun phrases.</li>
|
||||
<li><span class="inline_code">Constraint.fromstring('QUANTITY')</span> matches siblings of <span class="postag">QUANTITY</span> in the taxonomy.</li>
|
||||
</ul>
|
||||
<pre class="brush:python; gutter:false; light:true;">constraint = Constraint(
|
||||
words = [],
|
||||
tags = [],
|
||||
chunks = [],
|
||||
roles = [],
|
||||
taxa = [],
|
||||
optional = False,
|
||||
multiple = False,
|
||||
first = False,
|
||||
taxonomy = TAXONOMY,
|
||||
exclude = None,
|
||||
custom = None )</pre><pre class="brush:python; gutter:false; light:true;">constraint = Constraint.fromstring(string, **kwargs)</pre><pre class="brush:python; gutter:false; light:true;">constraint.index
|
||||
constraint.string
|
||||
constraint.words # List of allowed words/lemmata (of, with, ...)
|
||||
constraint.tags # List of allowed parts-of-speech (NN, JJ, ...)
|
||||
constraint.chunks # List of allowed chunk types (NP, VP, ...)
|
||||
constraint.roles # List of allowed chunk roles (SBJ, OBJ, ...)
|
||||
constraint.taxa # List of allowed taxonomy terms.
|
||||
constraint.taxonomy # Taxonomy used for lookup.
|
||||
constraint.optional # True => matches zero or one word.
|
||||
constraint.multiple # True => matches one or more words.
|
||||
constraint.first # True => can only match first word.
|
||||
constraint.exclude # None, or Constraint of disallowed options.
|
||||
constraint.custom # function(word) returns True if match. </pre><pre class="brush:python; gutter:false; light:true;">constraint.match(word)</pre><h3>Constraint string syntax</h3>
|
||||
<p><span class="inline_code">Constraint.fromstring()</span> returns a new <span class="inline_code">Constraint</span> from the given string. It takes the same optional parameters as the constructor. Uppercase words in the given string indicate a <a class="link-maintenance" href="MBSP-tags.html">part-of-speech tag</a> (e.g., <span class="postag">NN</span>, <span class="postag">JJ</span>, <span class="postag">VP</span>) or a taxonomy term (e.g. <span class="postag">PRODUCT</span>, <span class="postag">PERSON</span>).</p>
|
||||
<p>Some characters like <span class="inline_code">|</span> or <span class="inline_code">?</span> are special. They affect how the constraint is interpreted:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="smallcaps">Character</span></td>
|
||||
<td><span class="smallcaps">Example</span></td>
|
||||
<td><span class="smallcaps">Description</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">(</span></td>
|
||||
<td><span class="inline_code">(JJ)</span></td>
|
||||
<td>Wrapper for an optional constraint (deprecated).</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">[</span></td>
|
||||
<td><span class="inline_code">[Mac OS X | Windows Vista]</span></td>
|
||||
<td>Wrapper for a constraint that has spaces.<span class="inline_code"> </span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">{</span></td>
|
||||
<td><span class="inline_code">DT {JJ?} NN</span></td>
|
||||
<td>Wrapper for match groups, e.g., <span class="inline_code">Match.group(1)</span>.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">_</span></td>
|
||||
<td><span class="inline_code">Windows_Vista</span></td>
|
||||
<td>Converted to a space.<span class="inline_code"> </span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">|</span></td>
|
||||
<td><span class="inline_code">ADJP|ADVP</span></td>
|
||||
<td>Separator for different options.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">*</span></td>
|
||||
<td><span class="inline_code">JJ*</span></td>
|
||||
<td>Used as a wildcard character. <span class="inline_code"> </span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">!</span></td>
|
||||
<td><span class="inline_code">!be|VB*</span></td>
|
||||
<td>Used in front of words/tags that are <span style="text-decoration: underline;">not</span> allowed.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">?</span></td>
|
||||
<td><span class="inline_code">JJ?</span></td>
|
||||
<td>Used as a suffix, constraint is optional.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">+</span></td>
|
||||
<td><span class="inline_code">RB|JJ+</span> or <span class="inline_code">JJ?+</span> or <span class="inline_code">*+</span></td>
|
||||
<td>Used as a suffix, constraint can span multiple words.<span class="inline_code"> </span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">^</span></td>
|
||||
<td><span class="inline_code">^hello</span></td>
|
||||
<td>Used as a prefix, constraint can only match first word.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>The characters listed in the table must be escaped if used as content (e.g., <span class="inline_code">"\?"</span>). You can use the module's <span class="inline_code">escape()</span> function. For example, <span class="inline_code">escape("hello?")</span> returns <span class="inline_code">"hello\?"</span>.</p>
|
||||
<h3>Constraint matching</h3>
|
||||
<p><span class="inline_code">Constraint.match()</span> returns <span class="inline_code">True</span> if the given string or <span class="inline_code">Word</span> is part of the constraint:</p>
|
||||
<ul>
|
||||
<li>the word (or its lemma) occurs in <span class="inline_code">Constraint.words</span>, OR,</li>
|
||||
<li>the word (or its lemma) occurs in <span class="inline_code">Constraint.taxa</span> taxonomy tree, AND</li>
|
||||
<li>the word tags and/or chunk tags match those defined in the constraint.</li>
|
||||
</ul>
|
||||
<p>It is case-insensitive. Individual terms in <span class="inline_code">Constraint.words</span> can contain wildcards (<span class="inline_code">*</span>). Some part-of-speech-tags can also contain wildcards: <span class="postag">NN*</span>, <span class="postag">VB*</span>, <span class="postag">JJ*</span>, <span class="postag">RB*</span>, <span class="postag">PR*</span>, <span class="postag">WP*</span>.</p>
|
||||
<p>The following example demonstrates the use of optional and multiple constraints:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.search import search
|
||||
>>> from pattern.en import parsetree
|
||||
>>>
|
||||
>>> t = parsetree('tasty cat food')
|
||||
>>> print t
|
||||
>>> print
|
||||
>>> print search('DT? RB? JJ? NN+', t)
|
||||
|
||||
[Sentence('tasty/JJ/B-NP/O cat/NN/I-NP/O food/NN/I-NP/O')]
|
||||
|
||||
[Match(words=[Word(u'tasty/JJ'), Word(u'cat/NN')]), Word(u'food/NN')])]</pre></div>
|
||||
<p>The pattern matches successive nouns (<span class="postag">NN</span>), optionally preceded by a determiner (<span class="postag">DT</span>), adverb (<span class="postag">RB</span>) and/or adjective (<span class="postag">JJ</span>). It matches anything from <em>food</em> to <em>cat food</em>, <em>tasty cat food</em>, <em>the tasty cat food</em>, etc.</p>
|
||||
<h3>Constraint = greedy</h3>
|
||||
<p>The pattern.en parser groups words that belong together into chunks. For example, <em>the black cat</em> is one chunk, tagged <span class="postag">NP</span> (i.e., a noun phrase). The head of the chunk is <em>cat</em>. By default, when a constraint matches the chunk head, it will greedily match the entire chunk. This means that if we search for <em>cat</em> and the sentence has <em>a big black cat</em>, the entire chunk will be returned.</p>
|
||||
<p>This behavior can be disabled by passing a <span class="inline_code">STRICT</span> flag to <span class="inline_code">Pattern</span>, <span class="inline_code">compile()</span>, <span class="inline_code">search()</span> or <span class="inline_code">match()</span>:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.search import search, STRICT
|
||||
>>> from pattern.en import parsetree
|
||||
>>>
|
||||
>>> t = parsetree('The black cat is lurking in the tree.')
|
||||
>>> print search('cat', t)
|
||||
|
||||
[Match(words=[Word(u'The/DT'), Word(u'black/JJ'), Word(u'cat/NN')])]
|
||||
</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> print search('cat', t, STRICT)
|
||||
|
||||
[Match(words=[Word(u'cat/NN')])]
|
||||
</pre></div>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="match"></a>Match</h2>
|
||||
<p><span class="inline_code">Pattern.search()</span> returns a list of <span class="inline_code">Match</span> objects, where each match is a list of successive <span class="inline_code">Word</span> objects.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">match = Match(pattern, words=[])</pre><pre class="brush:python; gutter:false; light:true;">match.pattern # Pattern source.
|
||||
match.words # List of Word objects.
|
||||
match.string # String of words separated with a space.
|
||||
match.start # Index of first word in sentence.
|
||||
match.stop # Index of last word in sentence + 1.</pre><pre class="brush:python; gutter:false; light:true;">match.group(index, chunked=False)
|
||||
match.constraint(word)
|
||||
match.constraints(chunk)
|
||||
match.constituents(constraint=None)</pre><ul>
|
||||
<li><span class="inline_code">Match.group()</span> returns a list of <span class="inline_code">Word</span> objects matching the constraints in a <span class="inline_code">{</span> <span class="inline_code">}</span> group.</li>
|
||||
<li><span class="inline_code">Match.constraint()</span> returns the <span class="inline_code">Constraint</span> that matched the given <span class="inline_code">Word</span>, or <span class="inline_code">None</span>.</li>
|
||||
<li><span class="inline_code">Match.constraints()</span> returns the list of constraints that matched the given <span class="inline_code">Chunk</span>.</li>
|
||||
<li><span class="inline_code">Match.constituents()</span> returns a list of <span class="inline_code">Word</span> and <span class="inline_code">Chunk</span> objects, with successive words grouped into chunks whenever possible. Optionally, returns only chunks/words that matched the given <span class="inline_code">Constraint</span> (or list of constraints). Chunks are only available if a <span class="inline_code">Sentence</span> or <span class="inline_code">Text</span> was given (i.e., not for plain strings).</li>
|
||||
</ul>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.search import match
|
||||
>>> from pattern.en import parsetree
|
||||
>>>
|
||||
>>> t = parsetree('The turtle was faster than the hare.', lemmata=True)
|
||||
>>> m = match('NP be ADJP|ADVP than NP', t)
|
||||
>>>
|
||||
>>> for w in m.words:
|
||||
>>> print w, '\t =>', m.constraint(w)
|
||||
|
||||
Word(u'The/DT') => Constraint(chunks=['NP'])
|
||||
Word(u'turtle/NN') => Constraint(chunks=['NP'])
|
||||
Word(u'was/VBD') => Constraint(words=['be'])
|
||||
Word(u'faster/RBR') => Constraint(chunks=['ADJP', 'ADVP'])
|
||||
Word(u'than/IN') => Constraint(words=['than'])
|
||||
Word(u'the/DT') => Constraint(chunks=['NP'])
|
||||
Word(u'hare/NN') => Constraint(chunks=['NP'])
|
||||
</pre></div>
|
||||
<h3>Match groups</h3>
|
||||
<p>Match groups in the search pattern can be used to quickly retrieve what you need from a <span class="inline_code">Match</span>:</p>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">>>> t = parsetree('the big black dog')
|
||||
>>> m = match('DT {JJ?+ NN}', t)
|
||||
>>> print m.group(0) # full pattern
|
||||
>>> print m.group(1) # {JJ?+ NN}
|
||||
>>> print m.group(1).string
|
||||
|
||||
[Word(u'the/DT'), Word(u'big/JJ'), Word(u'black/JJ'), Word(u'dog/NN')]
|
||||
[Word(u'big/JJ'), Word(u'black/JJ'), Word(u'dog/NN')]
|
||||
'big black dog'</pre></div>
|
||||
<h3>Match words</h3>
|
||||
<p>Each <span class="inline_code">Word</span> in a <span class="inline_code">Match</span> or <span class="inline_code">Match.group()</span> has the following attributes:</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">word = Word(sentence, string, tag=None, index=0)</pre><pre class="brush:python; gutter:false; light:true;">word.string
|
||||
word.tag # Part-of-speech tag (e.g. NN, JJ).
|
||||
word.sentence # Sentence (a list of successive Words).
|
||||
word.index # Sentence index.
|
||||
</pre><p>When <span class="inline_code">search()</span> or <span class="inline_code">match()</span> is given a string, <span class="inline_code">Word</span> objects are created when the <span class="inline_code">Match</span> is returned. When given a parsed <span class="inline_code">Sentence</span>, <span class="inline_code">Word</span> objects are linked from the sentence. These have extra attributes. For an overview of <span class="inline_code">Sentence</span>, <span class="inline_code">Chunk</span> and <span class="inline_code">Word</span>, see the <a class="link-maintenance" href="pattern-en.html#tree">parse tree</a> documentation.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="taxonomy"></a>Taxonomy</h2>
|
||||
<p>A taxonomy is a hierarchical tree of words classified by semantic type. For example: a <em>begonia</em> is a <em>flower</em>, and a <em>flower</em> is a <em>plant</em>. Taxonomy terms can be used as constraints. For example, <span class="inline_code">"FLOWER"</span> will match <em>flower</em> as well as <em>begonia</em>, or any other flower that has been defined in the taxonomy. By default, constraints will retrieve terms from the global <span class="inline_code">taxonomy</span>.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">taxonomy = Taxonomy()</pre><pre class="brush:python; gutter:false; light:true;">taxonomy.case_sensitive # False by default.
|
||||
taxonomy.classifiers # List of Classifier objects.</pre><pre class="brush:python; gutter:false; light:true;">taxonomy.append(term, type=None)
|
||||
taxonomy.remove(term)</pre><pre class="brush:python; gutter:false; light:true;">taxonomy.classify(term)
|
||||
taxonomy.parents(term, recursive=False)
|
||||
taxonomy.children(term, recursive=False)
|
||||
</pre><ul>
|
||||
<li><span class="inline_code">Taxonomy.classify()</span> returns the (most recent) semantic type for a given term.<br />If the term is not in the taxonomy, it will try <span class="inline_code">Taxonomy.classifiers</span> (see further).</li>
|
||||
<li><span class="inline_code">Taxonomy.parents()</span> returns a list of all semantic types for the given term.</li>
|
||||
<li><span class="inline_code">Taxonomy.children()</span> returns a list of all terms for the given semantic type.<br />With <span class="inline_code">recursive=True</span>, traverses the entire branch.</li>
|
||||
</ul>
|
||||
<p>For example:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.search import taxonomy, search
|
||||
>>>
|
||||
>>> taxonomy.append('chicken', type='food')
|
||||
>>> taxonomy.append('chicken', type='bird')
|
||||
>>> taxonomy.append('penguin', type='bird')
|
||||
>>> taxonomy.append('bird', type='animal')
|
||||
>>>
|
||||
>>> print taxonomy.parents('chicken')
|
||||
>>> print taxonomy.children('animal', recursive=True)
|
||||
>>> print
|
||||
>>> print search('FOOD', "I'm eating chicken.")
|
||||
|
||||
['bird', 'food']
|
||||
['bird', 'penguin', 'chicken']
|
||||
|
||||
[Match(words=[Word('chicken')])]</pre></div>
|
||||
<h3>Taxonomy classifier</h3>
|
||||
<p>A <span class="inline_code">Classifier</span> offers a rule-based approach to enrich the taxonomy. If a term is not in the taxonomy, it will iterate its list of classifiers. Each classifier is a set of functions that can be customized to yield the semantic type of a term.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">classifier = Classifier(
|
||||
parents = lambda term: [],
|
||||
children = lambda term: [])</pre><pre class="brush:python; gutter:false; light:true;">classifier.parents(term) # Returns a list of parents for a term.
|
||||
classifier.children(term) # Returns a list of children for a term.
|
||||
</pre><p>This is useful because taxonomy terms can't include wildcards (i.e., the <span class="inline_code">*</span> character is taken literally).</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.search import taxonomy, search
|
||||
>>> from pattern.search import Classifier
|
||||
>>>
|
||||
>>> def parents(term):
|
||||
>>> return ['quality'] if term.endswith('ness') else []
|
||||
>>>
|
||||
>>> taxonomy.classifiers.append(Classifier(parents))
|
||||
>>> taxonomy.append('cat', type='animal')
|
||||
>>>
|
||||
>>> print search('QUALITY of a|an|the ANIMAL', 'the litheness of a cat')
|
||||
|
||||
[Match(words=[Word('litheness'), Word('of'), Word('a'), Word('cat')])]</pre></div>
|
||||
<p>This example creates a classifier that tags words ending in <em>-ness</em> as <span class="postag">quality</span> (e.g., sharpness, greediness). This is more concise than manually adding all words ending in <em>-ness</em> to the taxonomy. The <span class="postag">quality</span> term is then used as a constraint. Remember to always define <span class="inline_code">Classifier.parents()</span>. For performance, <span class="inline_code">Classifier.children()</span> is not called in <span class="inline_code">Constraint.match()</span>.</p>
|
||||
<h3 class="example">Taxonomy classifier from WordNet</h3>
|
||||
<p class="example">The following example creates a rule-based taxonomy from the <span class="inline_code">pattern.en.wordnet</span> module:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.search import taxonomy, WordNetClassifier
|
||||
>>>
|
||||
>>> taxonomy.classifiers.append(WordNetClassifier())
|
||||
>>>
|
||||
>>> print taxonomy.parents('cat', pos='NN')
|
||||
>>> print taxonomy.parents('cat', pos='VB')
|
||||
|
||||
['feline']
|
||||
['flog']</pre></div>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td style="text-align: center;">
|
||||
<p><br /><img src="../g/pattern-search-taxonomy.jpg" alt="" width="300" height="163" /></p>
|
||||
<p><span style="display: inline !important;"><br /><span class="smallcaps">wordnet taxonomy example</span></span></p>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="utility"></a>Utility functions</h2>
|
||||
<p>The pattern.search module has a number of useful list functions:</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">unique(iterable) # Returns a new list with unique items.</pre><pre class="brush:python; gutter:false; light:true;">find(function, iterable) # Returns first item for which function(item) is True.</pre><pre class="brush:python; gutter:false; light:true;">product(iterable, repeat=1) # Returns a generator of all combinations of length n.</pre><pre class="brush:python; gutter:false; light:true;">variations(iterable, optional=lambda item: False)</pre><pre class="brush:python; gutter:false; light:true;">odict(items=[])</pre><ul>
|
||||
<li><span class="inline_code">product()</span> returns a generator of all permutations, with replacement. <br />For example: <span class="inline_code">product([1,2,3),</span> <span class="inline_code">repeat=2)</span> yields:<br /><span class="inline_code">[1,1],</span> <span class="inline_code">[1,2],</span> <span class="inline_code">[1,3],</span> <span class="inline_code">[2,1],</span> <span class="inline_code">[2,2],</span> <span class="inline_code">[2,3],</span> <span class="inline_code">[3,1],</span> <span class="inline_code">[3,2],</span> <span class="inline_code">[3,3]</span></li>
|
||||
<li><span class="inline_code">variations()</span> returns all variations of a sequence with optional items (in-order).</li>
|
||||
<li><span class="inline_code">odict()</span> is a dictionary with ordered keys (e.g., like a stack).<br />The most recent keys will be returned first when traversing the dictionary.<br /><span class="inline_code">odict.push()</span> takes a <span class="inline_code">(key,</span> <span class="inline_code">value)</span>-tuple and sets the given key to the given value. If the key exists, it pushes the updated item to the top of the stack.</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1,115 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-shell</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-shell" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-shell</a></div>
|
||||
<h1>pattern.shell</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1400" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<h2>pattern.en parser</h2>
|
||||
<p>The English parser can be invoked from the command-line. The <a href="pattern.html">pattern</a> module should be installed (i.e., located in <span class="inline_code">/site-packages</span>, see installation instructions) or the current working directory should be the one that contains the <span class="inline_code">pattern</span> folder.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">> python -m pattern.en -f file.txt</pre><p><span>If no options are given a full parse is executed (i.e. tokenization, tagging, chunking, relations and lemmata). Otherwise, you need to explicitly list every required option:</span></p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="inline_code">-O</span></td>
|
||||
<td><span class="inline_code">--tokenize</span></td>
|
||||
<td>Tokenize the input.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">-T </span></td>
|
||||
<td><span class="inline_code">--tags </span></td>
|
||||
<td>Parse part-of-speech tags.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">-C</span> </td>
|
||||
<td><span class="inline_code">--chunks </span></td>
|
||||
<td>Parse chunks and <span class="postag">PNP</span> tags. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">-R</span> </td>
|
||||
<td><span class="inline_code">--relations</span> </td>
|
||||
<td>Parse verb/predicate relations. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">-L</span> </td>
|
||||
<td><span class="inline_code">--lemmata </span></td>
|
||||
<td>Parse lemmata (<em>was</em> → <em>be</em>). </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">-f </span></td>
|
||||
<td><span class="inline_code">--file</span> </td>
|
||||
<td>Input file path. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">-s </span></td>
|
||||
<td><span class="inline_code">--string </span></td>
|
||||
<td>Input string. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">-e</span> </td>
|
||||
<td><span class="inline_code">--encoding</span> </td>
|
||||
<td>Specify character encoding (utf-8 by default). </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">-v </span></td>
|
||||
<td class="inline_code">--version</td>
|
||||
<td>Print current version of Pattern.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>Short options can be concatenated. Also note the <span class="inline_code">xml</span> option which produces XML output:</p>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">> python -m pattern.en xml -OT -s 'The black cat sat on the mat.'</pre></div>
|
||||
<h3><span>pattern.es | de | fr | it | nl parsers</span></h3>
|
||||
<p><span>The parsers for other languages work in the same way. Note the <span class="inline_code">xml</span> option (produces XML output).</span></p>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">> python -m pattern.es -s 'El gato negro se sienta en la estera.'</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">> python -m pattern.de -s 'Die schwarze Katze liegt auf der Matte.'</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">> python -m pattern.fr -s "Le chat noir s'était assis sur le tapis."</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">> python -m pattern.it -s 'Il gatto nero faceva le fusa.'</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">> python -m pattern.nl -s 'De zwarte kat zat op de mat.'</pre></div>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1,54 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>stop-words</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/stop-words" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/stop-words</a></div>
|
||||
<h1>Stop words</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1378" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p>Stop words are words that are so common that they are often filtered out prior to, or after, processing of natural language data (text). For example, the <a href="pattern-vector.html">pattern.vector</a> module (by default) ignores stop words when constructing bag-of-words.</p>
|
||||
<p>There is no definitive list of stop words. The following set is based on <a href="http://snowball.tartarus.org/algorithms/english/stop.txt" target="_blank">Martin Porter's list</a> and expanded with words that occur frequently in other lists:</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<p> </p>
|
||||
<p><em>a, aboard, about, above, across, after, again, against, all, almost, alone, along, alongside, already, also, although, always, am, amid, amidst, among, amongst, an, and, another, anti, any, anybody, anyone, anything, anywhere, are, area, areas, aren't, around, as, ask, asked, asking, asks, astride, at, aught, away, back, backed, backing, backs, bar, barring, be, became, because, become, becomes, been, before, began, behind, being, beings, below, beneath, beside, besides, best, better, between, beyond, big, both, but, by, came, can, can't, cannot, case, cases, certain, certainly, circa, clear, clearly, come, concerning, considering, could, couldn't, daren't, despite, did, didn't, differ, different, differently, do, does, doesn't, doing, don't, done, down, down, downed, downing, downs, during, each, early, either, end, ended, ending, ends, enough, even, evenly, ever, every, everybody, everyone, everything, everywhere, except, excepting, excluding, face, faces, fact, facts, far, felt, few, fewer, find, finds, first, five, following, for, four, from, full, fully, further, furthered, furthering, furthers, gave, general, generally, get, gets, give, given, gives, go, goes, going, good, goods, got, great, greater, greatest, group, grouped, grouping, groups, had, hadn't, has, hasn't, have, haven't, having, he, he'd, he'll, he's, her, here, here's, hers, herself, high, high, high, higher, highest, him, himself, his, hisself, how, how's, however, i, i'd, i'll, i'm, i've, idem, if, ilk, important, in, including, inside, interest, interested, interesting, interests, into, is, isn't, it, it's, its, itself, just, keep, keeps, kind, knew, know, known, knows, large, largely, last, later, latest, least, less, let, let's, lets, like, likely, long, longer, longest, made, make, making, man, many, may, me, member, members, men, might, mightn't, mine, minus, more, most, mostly, mr, mrs, much, must, mustn't, my, myself, naught, near, necessary, need, needed, needing, needn't, needs, neither, never, new, new, newer, newest, next, no, nobody, non, none, noone, nor, not, nothing, notwithstanding, now, nowhere, number, numbers, of, off, often, old, older, oldest, on, once, one, oneself, only, onto, open, opened, opening, opens, opposite, or, order, ordered, ordering, orders, other, others, otherwise, ought, oughtn't, our, ours, ourself, ourselves, out, outside, over, own, part, parted, parting, parts, past, pending, per, perhaps, place, places, plus, point, pointed, pointing, points, possible, present, presented, presenting, presents, problem, problems, put, puts, quite, rather, really, regarding, right, right, room, rooms, round, said, same, save, saw, say, says, second, seconds, see, seem, seemed, seeming, seems, seen, sees, self, several, shall, shan't, she, she'd, she'll, she's, should, shouldn't, show, showed, showing, shows, side, sides, since, small, smaller, smallest, so, some, somebody, someone, something, somewhat, somewhere, state, states, still, still, such, suchlike, sundry, sure, take, taken, than, that, that's, the, thee, their, theirs, them, themselves, then, there, there's, therefore, these, they, they'd, they'll, they're, they've, thine, thing, things, think, thinks, this, those, thou, though, thought, thoughts, three, through, throughout, thus, thyself, till, to, today, together, too, took, tother, toward, towards, turn, turned, turning, turns, twain, two, under, underneath, unless, unlike, until, up, upon, us, use, used, uses, various, versus, very, via, vis-a-vis, want, wanted, wanting, wants, was, wasn't, way, ways, we, we'd, we'll, we're, we've, well, wells, went, were, weren't, what, what's, whatall, whatever, whatsoever, when, when's, where, where's, whereas, wherewith, wherewithal, whether, which, whichever, whichsoever, while, who, who's, whoever, whole, whom, whomever, whomso, whomsoever, whose, whosoever, why, why's, will, with, within, without, won't, work, worked, working, works, worth, would, wouldn't, ye, year, years, yet, yon, yonder, you, you'd, you'll, you're, you've, you-all, young, younger, youngest, your, yours, yourself, yourselves</em></p>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -1 +0,0 @@
|
||||
<meta http-equiv="refresh" content="0; url=html/pattern.html" />
|
@ -1,65 +0,0 @@
|
||||
/**
|
||||
* SyntaxHighlighter
|
||||
* http://alexgorbatchev.com/SyntaxHighlighter
|
||||
*
|
||||
* SyntaxHighlighter is donationware. If you are using it, please donate.
|
||||
* http://alexgorbatchev.com/SyntaxHighlighter/donate.html
|
||||
*
|
||||
* @version
|
||||
* 3.0.83 (July 02 2010)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2004-2010 Alex Gorbatchev.
|
||||
*
|
||||
* @license
|
||||
* Dual licensed under the MIT and GPL licenses.
|
||||
*/
|
||||
;(function()
|
||||
{
|
||||
// CommonJS
|
||||
typeof(require) != 'undefined' ? SyntaxHighlighter = require('shCore').SyntaxHighlighter : null;
|
||||
|
||||
function Brush()
|
||||
{
|
||||
var keywords1 = 'break case catch continue ' +
|
||||
'default delete do else ' +
|
||||
'for function if in instanceof ' +
|
||||
'new return switch ' +
|
||||
'throw try typeof var while with'
|
||||
;
|
||||
|
||||
var keywords2 = 'false true null super this';
|
||||
|
||||
var keywords3 = 'alert back blur close confirm focus forward home' +
|
||||
'name navigate onblur onerror onfocus onload onmove' +
|
||||
'onresize onunload open print prompt scroll status stop';
|
||||
|
||||
var r = SyntaxHighlighter.regexLib;
|
||||
|
||||
this.regexList = [
|
||||
{ regex: r.multiLineDoubleQuotedString, css: 'string' }, // double quoted strings
|
||||
{ regex: r.multiLineSingleQuotedString, css: 'string' }, // single quoted strings
|
||||
{ regex: r.singleLineCComments, css: 'comments1' }, // one line comments
|
||||
{ regex: r.multiLineCComments, css: 'comments2' }, // multiline comments
|
||||
{ regex: /\s*#.*/gm, css: 'preprocessor' }, // preprocessor tags like #region and #endregion
|
||||
{ regex: /function ([^\()]+)\(/g, func: function(match, r) {
|
||||
return [
|
||||
new SyntaxHighlighter.Match("function ", match.index, "keyword1"),
|
||||
new SyntaxHighlighter.Match(match[1], match.index+9, "name")
|
||||
]; } },
|
||||
{ regex: new RegExp(this.getKeywords(keywords1), 'gm'), css: 'keyword1' }, // keywords 1
|
||||
{ regex: new RegExp(this.getKeywords(keywords2), 'gm'), css: 'keyword2' }, // keywords 2
|
||||
{ regex: new RegExp(this.getKeywords(keywords3), 'gm'), css: 'keyword3' } // keywords 3
|
||||
];
|
||||
|
||||
this.forHtmlScript(r.scriptScriptTags);
|
||||
};
|
||||
|
||||
Brush.prototype = new SyntaxHighlighter.Highlighter();
|
||||
Brush.aliases = ['js', 'jscript', 'javascript'];
|
||||
|
||||
SyntaxHighlighter.brushes.JScript = Brush;
|
||||
|
||||
// CommonJS
|
||||
typeof(exports) != 'undefined' ? exports.Brush = Brush : null;
|
||||
})();
|
@ -1,73 +0,0 @@
|
||||
/**
|
||||
* SyntaxHighlighter
|
||||
* http://alexgorbatchev.com/SyntaxHighlighter
|
||||
*
|
||||
* SyntaxHighlighter is donationware. If you are using it, please donate.
|
||||
* http://alexgorbatchev.com/SyntaxHighlighter/donate.html
|
||||
*
|
||||
* @version
|
||||
* 3.0.83 (July 02 2010)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2004-2010 Alex Gorbatchev.
|
||||
*
|
||||
* @license
|
||||
* Dual licensed under the MIT and GPL licenses.
|
||||
*/
|
||||
;(function()
|
||||
{
|
||||
// CommonJS
|
||||
typeof(require) != 'undefined' ? SyntaxHighlighter = require('shCore').SyntaxHighlighter : null;
|
||||
|
||||
function Brush()
|
||||
{
|
||||
// Contributed by Gheorghe Milas and Ahmad Sherif
|
||||
|
||||
var keywords = 'and assert break class continue def del elif else ' +
|
||||
'except exec finally for from global if import in is ' +
|
||||
'lambda not or pass print raise return try yield while';
|
||||
|
||||
var funcs = '__import__ abs all any apply basestring bin bool buffer callable ' +
|
||||
'chr classmethod cmp coerce compile complex delattr dict dir ' +
|
||||
'divmod enumerate eval execfile file filter float format frozenset ' +
|
||||
'getattr globals hasattr hash help hex id input int intern ' +
|
||||
'isinstance issubclass iter len list locals long map max min next ' +
|
||||
'object oct open ord pow property range raw_input reduce ' +
|
||||
'reload repr reversed round set setattr slice sorted staticmethod ' +
|
||||
'str sum super tuple type type unichr unicode vars xrange zip';
|
||||
|
||||
var special = 'None True False self cls class_';
|
||||
|
||||
this.regexList = [
|
||||
{ regex: SyntaxHighlighter.regexLib.singleLinePerlComments, css: 'comments1' },
|
||||
{ regex: /^\s*@\w+/gm, css: 'decorator' },
|
||||
{ regex: /(['\"]{3})([^\1])*?\1/gm, css: 'comments2' },
|
||||
{ regex: /"(?!")(?:\.|\\\"|[^\""\n])*"/gm, css: 'string' },
|
||||
{ regex: /'(?!')(?:\.|(\\\')|[^\''\n])*'/gm, css: 'string' },
|
||||
{ regex: /\b\d+\.?\w*/g, css: 'value' },
|
||||
{ regex: /def ([^\()]+)\(/g, func: function(match, r) {
|
||||
return [
|
||||
new SyntaxHighlighter.Match("def ", match.index, "keyword"),
|
||||
new SyntaxHighlighter.Match(match[1], match.index+4, "name")
|
||||
]; } },
|
||||
{ regex: /class ([0-9a-zA-Z]+)(\(|:)/g, func: function(match, r) {
|
||||
return [
|
||||
new SyntaxHighlighter.Match("class ", match.index, "keyword"),
|
||||
new SyntaxHighlighter.Match(match[1], match.index+6, "name")
|
||||
]; } },
|
||||
{ regex: new RegExp(this.getKeywords(funcs), 'gmi'), css: 'functions' },
|
||||
{ regex: new RegExp(this.getKeywords(keywords), 'gm'), css: 'keyword' },
|
||||
{ regex: new RegExp(this.getKeywords(special), 'gm'), css: 'color1' }
|
||||
];
|
||||
|
||||
this.forHtmlScript(SyntaxHighlighter.regexLib.aspScriptTags);
|
||||
};
|
||||
|
||||
Brush.prototype = new SyntaxHighlighter.Highlighter();
|
||||
Brush.aliases = ['py', 'python'];
|
||||
|
||||
SyntaxHighlighter.brushes.Python = Brush;
|
||||
|
||||
// CommonJS
|
||||
typeof(exports) != 'undefined' ? exports.Brush = Brush : null;
|
||||
})();
|
@ -1,69 +0,0 @@
|
||||
/**
|
||||
* SyntaxHighlighter
|
||||
* http://alexgorbatchev.com/SyntaxHighlighter
|
||||
*
|
||||
* SyntaxHighlighter is donationware. If you are using it, please donate.
|
||||
* http://alexgorbatchev.com/SyntaxHighlighter/donate.html
|
||||
*
|
||||
* @version
|
||||
* 3.0.83 (July 02 2010)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2004-2010 Alex Gorbatchev.
|
||||
*
|
||||
* @license
|
||||
* Dual licensed under the MIT and GPL licenses.
|
||||
*/
|
||||
;(function()
|
||||
{
|
||||
// CommonJS
|
||||
typeof(require) != 'undefined' ? SyntaxHighlighter = require('shCore').SyntaxHighlighter : null;
|
||||
|
||||
function Brush()
|
||||
{
|
||||
function process(match, regexInfo)
|
||||
{
|
||||
var constructor = SyntaxHighlighter.Match,
|
||||
code = match[0],
|
||||
tag = new XRegExp('(<|<)[\\s\\/\\?]*(?<name>[:\\w-\\.]+)', 'xg').exec(code),
|
||||
result = []
|
||||
;
|
||||
|
||||
if (match.attributes != null)
|
||||
{
|
||||
var attributes,
|
||||
regex = new XRegExp('(?<name> [\\w:\\-\\.]+)' +
|
||||
'\\s*=\\s*' +
|
||||
'(?<value> ".*?"|\'.*?\'|\\w+)',
|
||||
'xg');
|
||||
|
||||
while ((attributes = regex.exec(code)) != null)
|
||||
{
|
||||
result.push(new constructor(attributes.name, match.index + attributes.index, 'color1'));
|
||||
result.push(new constructor(attributes.value, match.index + attributes.index + attributes[0].indexOf(attributes.value), 'string'));
|
||||
}
|
||||
}
|
||||
|
||||
if (tag != null)
|
||||
result.push(
|
||||
new constructor(tag.name, match.index + tag[0].indexOf(tag.name), 'keyword')
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
this.regexList = [
|
||||
{ regex: new XRegExp('(\\<|<)\\!\\[[\\w\\s]*?\\[(.|\\s)*?\\]\\](\\>|>)', 'gm'), css: 'color2' }, // <![ ... [ ... ]]>
|
||||
{ regex: SyntaxHighlighter.regexLib.xmlComments, css: 'comments' }, // <!-- ... -->
|
||||
{ regex: new XRegExp('(<|<)[\\s\\/\\?]*(\\w+)(?<attributes>.*?)[\\s\\/\\?]*(>|>)', 'sg'), func: process }
|
||||
];
|
||||
};
|
||||
|
||||
Brush.prototype = new SyntaxHighlighter.Highlighter();
|
||||
Brush.aliases = ['xml', 'xhtml', 'xslt', 'html'];
|
||||
|
||||
SyntaxHighlighter.brushes.Xml = Brush;
|
||||
|
||||
// CommonJS
|
||||
typeof(exports) != 'undefined' ? exports.Brush = Brush : null;
|
||||
})();
|
@ -1,226 +0,0 @@
|
||||
/**
|
||||
* SyntaxHighlighter
|
||||
* http://alexgorbatchev.com/SyntaxHighlighter
|
||||
*
|
||||
* SyntaxHighlighter is donationware. If you are using it, please donate.
|
||||
* http://alexgorbatchev.com/SyntaxHighlighter/donate.html
|
||||
*
|
||||
* @version
|
||||
* 3.0.83 (July 02 2010)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2004-2010 Alex Gorbatchev.
|
||||
*
|
||||
* @license
|
||||
* Dual licensed under the MIT and GPL licenses.
|
||||
*/
|
||||
.syntaxhighlighter a,
|
||||
.syntaxhighlighter div,
|
||||
.syntaxhighlighter code,
|
||||
.syntaxhighlighter table,
|
||||
.syntaxhighlighter table td,
|
||||
.syntaxhighlighter table tr,
|
||||
.syntaxhighlighter table tbody,
|
||||
.syntaxhighlighter table thead,
|
||||
.syntaxhighlighter table caption,
|
||||
.syntaxhighlighter textarea {
|
||||
-moz-border-radius: 0 0 0 0 !important;
|
||||
-webkit-border-radius: 0 0 0 0 !important;
|
||||
background: none !important;
|
||||
border: 0 !important;
|
||||
bottom: auto !important;
|
||||
float: none !important;
|
||||
height: auto !important;
|
||||
left: auto !important;
|
||||
line-height: 1.1em !important;
|
||||
margin: 0 !important;
|
||||
outline: 0 !important;
|
||||
overflow: visible !important;
|
||||
padding: 0 !important;
|
||||
position: static !important;
|
||||
right: auto !important;
|
||||
text-align: left !important;
|
||||
top: auto !important;
|
||||
vertical-align: baseline !important;
|
||||
width: auto !important;
|
||||
box-sizing: content-box !important;
|
||||
font-family: "Consolas", "Bitstream Vera Sans Mono", "Courier New", Courier, monospace !important;
|
||||
font-weight: normal !important;
|
||||
font-style: normal !important;
|
||||
font-size: 1em !important;
|
||||
min-height: inherit !important;
|
||||
min-height: auto !important;
|
||||
}
|
||||
|
||||
.syntaxhighlighter {
|
||||
width: 100% !important;
|
||||
margin: 1em 0 1em 0 !important;
|
||||
position: relative !important;
|
||||
overflow: auto !important;
|
||||
font-size: 1em !important;
|
||||
}
|
||||
.syntaxhighlighter.source {
|
||||
overflow: hidden !important;
|
||||
}
|
||||
.syntaxhighlighter .bold {
|
||||
font-weight: bold !important;
|
||||
}
|
||||
.syntaxhighlighter .italic {
|
||||
font-style: italic !important;
|
||||
}
|
||||
.syntaxhighlighter .line {
|
||||
white-space: pre !important;
|
||||
}
|
||||
.syntaxhighlighter table {
|
||||
width: 100% !important;
|
||||
}
|
||||
.syntaxhighlighter table caption {
|
||||
text-align: left !important;
|
||||
padding: .5em 0 0.5em 1em !important;
|
||||
}
|
||||
.syntaxhighlighter table td.code {
|
||||
width: 100% !important;
|
||||
}
|
||||
.syntaxhighlighter table td.code .container {
|
||||
position: relative !important;
|
||||
}
|
||||
.syntaxhighlighter table td.code .container textarea {
|
||||
box-sizing: border-box !important;
|
||||
position: absolute !important;
|
||||
left: 0 !important;
|
||||
top: 0 !important;
|
||||
width: 100% !important;
|
||||
height: 100% !important;
|
||||
border: none !important;
|
||||
background: white !important;
|
||||
padding-left: 1em !important;
|
||||
overflow: hidden !important;
|
||||
white-space: pre !important;
|
||||
}
|
||||
.syntaxhighlighter table td.gutter .line {
|
||||
text-align: right !important;
|
||||
padding: 0 0.5em 0 1em !important;
|
||||
}
|
||||
.syntaxhighlighter table td.code .line {
|
||||
padding: 0 1em !important;
|
||||
}
|
||||
.syntaxhighlighter.nogutter td.code .container textarea, .syntaxhighlighter.nogutter td.code .line {
|
||||
padding-left: 0em !important;
|
||||
}
|
||||
.syntaxhighlighter.show {
|
||||
display: block !important;
|
||||
}
|
||||
.syntaxhighlighter.collapsed table {
|
||||
display: none !important;
|
||||
}
|
||||
.syntaxhighlighter.collapsed .toolbar {
|
||||
padding: 0.1em 0.8em 0em 0.8em !important;
|
||||
font-size: 1em !important;
|
||||
position: static !important;
|
||||
width: auto !important;
|
||||
height: auto !important;
|
||||
}
|
||||
.syntaxhighlighter.collapsed .toolbar span {
|
||||
display: inline !important;
|
||||
margin-right: 1em !important;
|
||||
}
|
||||
.syntaxhighlighter.collapsed .toolbar span a {
|
||||
padding: 0 !important;
|
||||
display: none !important;
|
||||
}
|
||||
.syntaxhighlighter.collapsed .toolbar span a.expandSource {
|
||||
display: inline !important;
|
||||
}
|
||||
.syntaxhighlighter .toolbar {
|
||||
position: absolute !important;
|
||||
right: 1px !important;
|
||||
top: 1px !important;
|
||||
width: 11px !important;
|
||||
height: 11px !important;
|
||||
font-size: 10px !important;
|
||||
z-index: 10 !important;
|
||||
}
|
||||
.syntaxhighlighter .toolbar span.title {
|
||||
display: inline !important;
|
||||
}
|
||||
.syntaxhighlighter .toolbar a {
|
||||
display: block !important;
|
||||
text-align: center !important;
|
||||
text-decoration: none !important;
|
||||
padding-top: 1px !important;
|
||||
}
|
||||
.syntaxhighlighter .toolbar a.expandSource {
|
||||
display: none !important;
|
||||
}
|
||||
.syntaxhighlighter.ie {
|
||||
font-size: .9em !important;
|
||||
padding: 1px 0 1px 0 !important;
|
||||
}
|
||||
.syntaxhighlighter.ie .toolbar {
|
||||
line-height: 8px !important;
|
||||
}
|
||||
.syntaxhighlighter.ie .toolbar a {
|
||||
padding-top: 0px !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .line.alt1 .content,
|
||||
.syntaxhighlighter.printing .line.alt2 .content,
|
||||
.syntaxhighlighter.printing .line.highlighted .number,
|
||||
.syntaxhighlighter.printing .line.highlighted.alt1 .content,
|
||||
.syntaxhighlighter.printing .line.highlighted.alt2 .content {
|
||||
background: none !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .line .number {
|
||||
color: #bbbbbb !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .line .content {
|
||||
color: black !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .toolbar {
|
||||
display: none !important;
|
||||
}
|
||||
.syntaxhighlighter.printing a {
|
||||
text-decoration: none !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .plain, .syntaxhighlighter.printing .plain a {
|
||||
color: black !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .comments, .syntaxhighlighter.printing .comments a {
|
||||
color: #008200 !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .string, .syntaxhighlighter.printing .string a {
|
||||
color: blue !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .keyword {
|
||||
color: #006699 !important;
|
||||
font-weight: bold !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .preprocessor {
|
||||
color: gray !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .variable {
|
||||
color: #aa7700 !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .value {
|
||||
color: #009900 !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .functions {
|
||||
color: #ff1493 !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .constants {
|
||||
color: #0066cc !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .script {
|
||||
font-weight: bold !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .color1, .syntaxhighlighter.printing .color1 a {
|
||||
color: gray !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .color2, .syntaxhighlighter.printing .color2 a {
|
||||
color: #ff1493 !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .color3, .syntaxhighlighter.printing .color3 a {
|
||||
color: red !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .break, .syntaxhighlighter.printing .break a {
|
||||
color: black !important;
|
||||
}
|
@ -1,117 +0,0 @@
|
||||
/**
|
||||
* SyntaxHighlighter
|
||||
* http://alexgorbatchev.com/SyntaxHighlighter
|
||||
*
|
||||
* SyntaxHighlighter is donationware. If you are using it, please donate.
|
||||
* http://alexgorbatchev.com/SyntaxHighlighter/donate.html
|
||||
*
|
||||
* @version
|
||||
* 3.0.83 (July 02 2010)
|
||||
*
|
||||
* @copyright
|
||||
* Copyright (C) 2004-2010 Alex Gorbatchev.
|
||||
*
|
||||
* @license
|
||||
* Dual licensed under the MIT and GPL licenses.
|
||||
*/
|
||||
.syntaxhighlighter {
|
||||
background-color: white !important;
|
||||
}
|
||||
.syntaxhighlighter .line.alt1 {
|
||||
background-color: white !important;
|
||||
}
|
||||
.syntaxhighlighter .line.alt2 {
|
||||
background-color: white !important;
|
||||
}
|
||||
.syntaxhighlighter .line.highlighted.alt1, .syntaxhighlighter .line.highlighted.alt2 {
|
||||
background-color: #e0e0e0 !important;
|
||||
}
|
||||
.syntaxhighlighter .line.highlighted.number {
|
||||
color: black !important;
|
||||
}
|
||||
.syntaxhighlighter table caption {
|
||||
color: black !important;
|
||||
}
|
||||
.syntaxhighlighter .gutter {
|
||||
color: #afafaf !important;
|
||||
}
|
||||
.syntaxhighlighter .gutter .line {
|
||||
border-right: 3px solid #6ce26c !important;
|
||||
}
|
||||
.syntaxhighlighter .gutter .line.highlighted {
|
||||
background-color: #6ce26c !important;
|
||||
color: white !important;
|
||||
}
|
||||
.syntaxhighlighter.printing .line .content {
|
||||
border: none !important;
|
||||
}
|
||||
.syntaxhighlighter.collapsed {
|
||||
overflow: visible !important;
|
||||
}
|
||||
.syntaxhighlighter.collapsed .toolbar {
|
||||
color: blue !important;
|
||||
background: white !important;
|
||||
border: 1px solid #6ce26c !important;
|
||||
}
|
||||
.syntaxhighlighter.collapsed .toolbar a {
|
||||
color: blue !important;
|
||||
}
|
||||
.syntaxhighlighter.collapsed .toolbar a:hover {
|
||||
color: red !important;
|
||||
}
|
||||
.syntaxhighlighter .toolbar {
|
||||
color: white !important;
|
||||
background: #6ce26c !important;
|
||||
border: none !important;
|
||||
}
|
||||
.syntaxhighlighter .toolbar a {
|
||||
color: white !important;
|
||||
}
|
||||
.syntaxhighlighter .toolbar a:hover {
|
||||
color: black !important;
|
||||
}
|
||||
.syntaxhighlighter .plain, .syntaxhighlighter .plain a {
|
||||
color: black !important;
|
||||
}
|
||||
.syntaxhighlighter .comments, .syntaxhighlighter .comments a {
|
||||
color: #008200 !important;
|
||||
}
|
||||
.syntaxhighlighter .string, .syntaxhighlighter .string a {
|
||||
color: blue !important;
|
||||
}
|
||||
.syntaxhighlighter .keyword {
|
||||
color: #006699 !important;
|
||||
}
|
||||
.syntaxhighlighter .preprocessor {
|
||||
color: gray !important;
|
||||
}
|
||||
.syntaxhighlighter .variable {
|
||||
color: #aa7700 !important;
|
||||
}
|
||||
.syntaxhighlighter .value {
|
||||
color: #009900 !important;
|
||||
}
|
||||
.syntaxhighlighter .functions {
|
||||
color: #ff1493 !important;
|
||||
}
|
||||
.syntaxhighlighter .constants {
|
||||
color: #0066cc !important;
|
||||
}
|
||||
.syntaxhighlighter .script {
|
||||
font-weight: bold !important;
|
||||
color: #006699 !important;
|
||||
background-color: none !important;
|
||||
}
|
||||
.syntaxhighlighter .color1, .syntaxhighlighter .color1 a {
|
||||
color: gray !important;
|
||||
}
|
||||
.syntaxhighlighter .color2, .syntaxhighlighter .color2 a {
|
||||
color: #ff1493 !important;
|
||||
}
|
||||
.syntaxhighlighter .color3, .syntaxhighlighter .color3 a {
|
||||
color: red !important;
|
||||
}
|
||||
|
||||
.syntaxhighlighter .keyword {
|
||||
font-weight: bold !important;
|
||||
}
|