thrid updates
Before Width: | Height: | Size: 137 KiB |
Before Width: | Height: | Size: 431 KiB |
Before Width: | Height: | Size: 290 KiB |
Before Width: | Height: | Size: 271 KiB |
Before Width: | Height: | Size: 1.5 MiB |
Before Width: | Height: | Size: 502 KiB |
Before Width: | Height: | Size: 341 KiB |
@ -1,57 +0,0 @@
|
||||
import collections
|
||||
|
||||
# this script was adapted from:
|
||||
# https://towardsdatascience.com/very-simple-python-script-for-extracting-most-common-words-from-a-story-1e3570d0b9d0
|
||||
# https://git.xpub.nl/rita/categorization_of_files/src/branch/master/categorization.py
|
||||
|
||||
|
||||
# open and read file
|
||||
file = open(input("\nwhich platform's Terms of Service do you want to look at: \n"), encoding="utf8")
|
||||
a = file.read()
|
||||
|
||||
# my stopwords are common words I don't want to count, like "a", "an", "the".
|
||||
stopwords = set(line.strip() for line in open('stopwords.txt'))
|
||||
|
||||
# dictionary
|
||||
wordcount = {}
|
||||
|
||||
# spliting words from punctuation so "book" and "book!" counts as the same word
|
||||
for word in a.lower().split():
|
||||
word = word.replace(".","")
|
||||
word = word.replace(",","")
|
||||
word = word.replace(":","")
|
||||
word = word.replace("\"","")
|
||||
word = word.replace("!","")
|
||||
word = word.replace("“","")
|
||||
word = word.replace("‘","")
|
||||
word = word.replace("*","")
|
||||
|
||||
# counting
|
||||
if word not in stopwords:
|
||||
if word not in wordcount:
|
||||
wordcount[word] = 1
|
||||
else:
|
||||
wordcount[word] += 1
|
||||
|
||||
# print x most common words
|
||||
# n_print = int(input("How many most common words to print: "))
|
||||
n_print = int(5)
|
||||
print("\nMost used colonial words are:")
|
||||
word_counter = collections.Counter(wordcount)
|
||||
for word, count in word_counter.most_common(n_print):
|
||||
print(word,"—", count)
|
||||
|
||||
# categories
|
||||
|
||||
# words that are inside the category Library Studies
|
||||
library_studies = set(line.strip() for line in open('library_studies.txt'))
|
||||
|
||||
for word, count in word_counter.most_common(n_print):
|
||||
if word in library_studies:
|
||||
print("\nWe suggest the following categorization for this file:\nLibrary Studies\n")
|
||||
break
|
||||
else:
|
||||
print("\nWe don't have any suggestion of categorization for this file.\n")
|
||||
|
||||
# Close the file
|
||||
file.close()
|
@ -1,82 +0,0 @@
|
||||
import collections
|
||||
# from termcolor import colored
|
||||
# this script was adapted from:
|
||||
# https://towardsdatascience.com/very-simple-python-script-for-extracting-most-common-words-from-a-story-1e3570d0b9d0
|
||||
# https://git.xpub.nl/rita/categorization_of_files/src/branch/master/categorization.py
|
||||
|
||||
|
||||
# open and read file
|
||||
file = open(input("\nwhich platform's Terms of Service do you want to look at: \n"), encoding="utf8")
|
||||
a = file.read()
|
||||
|
||||
# f = open("tiktok.txt", "r")
|
||||
# print(f.read())
|
||||
|
||||
# my stopwords are common words I don't want to count, like "a", "an", "the".
|
||||
stopwords = set(line.strip() for line in open('stopwords.txt'))
|
||||
|
||||
# dictionary
|
||||
wordcount = {}
|
||||
|
||||
# spliting words from punctuation so "book" and "book!" counts as the same word
|
||||
for word in a.lower().split():
|
||||
word = word.replace(".","")
|
||||
word = word.replace(",","")
|
||||
word = word.replace(":","")
|
||||
word = word.replace("\"","")
|
||||
word = word.replace("!","")
|
||||
word = word.replace("“","")
|
||||
word = word.replace("‘","")
|
||||
word = word.replace("*","")
|
||||
word = word.replace("(","")
|
||||
word = word.replace(")","")
|
||||
|
||||
# counting
|
||||
if word not in stopwords:
|
||||
if word not in wordcount:
|
||||
wordcount[word] = 1
|
||||
else:
|
||||
wordcount[word] += 1
|
||||
|
||||
# print x most common words
|
||||
n_print = int(100)
|
||||
print("\nMost used colonial words are:")
|
||||
|
||||
word_counter = collections.Counter(wordcount)
|
||||
for word, count in word_counter.most_common(n_print):
|
||||
print(word,"—", count)
|
||||
|
||||
|
||||
# word_counter = collections.Counter(wordcount)
|
||||
# for word, count in word_counter.most_common(n_print):
|
||||
# print(word,"—", count)
|
||||
|
||||
|
||||
# colonial texts in bold
|
||||
# for word in n_print:
|
||||
# if word in n_print:
|
||||
# wordcount.append(colored(word, 'white', 'on_red'))
|
||||
# else:
|
||||
# wordcount.append(t)
|
||||
|
||||
|
||||
# print(" ".join(colored(word, 'white', 'on_red'))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# categories
|
||||
|
||||
# words that are inside the category Library Studies
|
||||
library_studies = set(line.strip() for line in open('library_studies.txt'))
|
||||
|
||||
for word, count in word_counter.most_common(n_print):
|
||||
if word in library_studies:
|
||||
print("\nWe suggest the following categorization for this platform:\nLibrary Studies\n")
|
||||
break
|
||||
else:
|
||||
print("\nThese are the TikTok's colonial words.\n")
|
||||
|
||||
# Close the file
|
||||
file.close()
|
Before Width: | Height: | Size: 234 KiB |
@ -1,19 +0,0 @@
|
||||
archives
|
||||
author
|
||||
bibliographic
|
||||
bibliotheca
|
||||
book
|
||||
bookcase
|
||||
books
|
||||
bookshelf
|
||||
bookstore
|
||||
catalogue
|
||||
e-book
|
||||
librarian
|
||||
librarianship
|
||||
library
|
||||
literature
|
||||
manuscripts
|
||||
papyrus
|
||||
read
|
||||
reading
|
@ -1,67 +0,0 @@
|
||||
-
|
||||
a
|
||||
about
|
||||
all
|
||||
an
|
||||
and
|
||||
are
|
||||
as
|
||||
at
|
||||
be
|
||||
but
|
||||
by
|
||||
can
|
||||
do
|
||||
for
|
||||
from
|
||||
get
|
||||
had
|
||||
has
|
||||
have
|
||||
he
|
||||
I
|
||||
i
|
||||
if
|
||||
in
|
||||
into
|
||||
is
|
||||
it
|
||||
its
|
||||
me
|
||||
more
|
||||
my
|
||||
not
|
||||
of
|
||||
on
|
||||
one
|
||||
or
|
||||
other
|
||||
out
|
||||
so
|
||||
some
|
||||
such
|
||||
than
|
||||
that
|
||||
the
|
||||
their
|
||||
them
|
||||
then
|
||||
there
|
||||
these
|
||||
they
|
||||
this
|
||||
those
|
||||
to
|
||||
up
|
||||
was
|
||||
were
|
||||
what
|
||||
when
|
||||
which
|
||||
who
|
||||
whom
|
||||
will
|
||||
with
|
||||
would
|
||||
|
|
||||
—
|
Before Width: | Height: | Size: 282 KiB |
@ -0,0 +1,75 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.pyc
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
.coveralls.yml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
*.dev*
|
||||
*.nja
|
||||
|
||||
build
|
||||
dist
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Flymake
|
||||
*_flymake.py
|
||||
|
||||
# Pattern specific ignore pattern
|
||||
pattern/web/cache/tmp/
|
||||
web/cache/tmp/
|
||||
pattern_unittest_db
|
||||
test/pattern_unittest_db
|
||||
|
||||
.DS_Store
|
@ -0,0 +1,249 @@
|
||||
[MASTER]
|
||||
|
||||
# Specify a configuration file.
|
||||
#rcfile=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
# pygtk.require().
|
||||
#init-hook=
|
||||
|
||||
# Profiled execution.
|
||||
profile=no
|
||||
|
||||
# Add files or directories to the blacklist. They should be base names, not
|
||||
# paths.
|
||||
ignore=CVS, feed, json, pdf, soup, pywordnet, svm
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# List of plugins (as comma separated values of python modules names) to load,
|
||||
# usually to register additional checkers.
|
||||
load-plugins=
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time.
|
||||
#enable=
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You
|
||||
# can either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time (only on the command line, not in the configuration file where
|
||||
# it should appear only once).
|
||||
disable=C0103,W0142,E1103
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Set the output format. Available formats are text, parseable, colorized, msvs
|
||||
# (visual studio) and html
|
||||
output-format=text
|
||||
|
||||
# Include message's id in output
|
||||
include-ids=yes
|
||||
|
||||
# Put messages in a separate file for each module / package specified on the
|
||||
# command line instead of printing them on stdout. Reports (if any) will be
|
||||
# written in a file name "pylint_global.[txt|html]".
|
||||
files-output=no
|
||||
|
||||
# Tells whether to display a full report or only the messages
|
||||
reports=yes
|
||||
|
||||
# Python expression which should return a note less than 10 (10 is the highest
|
||||
# note). You have access to the variables errors warning, statement which
|
||||
# respectively contain the number of errors / warnings messages and the total
|
||||
# number of statements analyzed. This is used by the global evaluation report
|
||||
# (RP0004).
|
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
||||
|
||||
# Add a comment according to your evaluation note. This is used by the global
|
||||
# evaluation report (RP0004).
|
||||
comment=no
|
||||
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Required attributes for module, separated by a comma
|
||||
required-attributes=
|
||||
|
||||
# List of builtins function names that should not be used, separated by a comma
|
||||
bad-functions=map,filter,apply,input
|
||||
|
||||
# Regular expression which should only match correct module names
|
||||
module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
|
||||
|
||||
# Regular expression which should only match correct module level names
|
||||
const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
|
||||
|
||||
# Regular expression which should only match correct class names
|
||||
class-rgx=[A-Z_][a-zA-Z0-9]+$
|
||||
|
||||
# Regular expression which should only match correct function names
|
||||
function-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct method names
|
||||
method-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct instance attribute names
|
||||
attr-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct argument names
|
||||
argument-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct variable names
|
||||
variable-rgx=[a-z_][a-z0-9_]{2,30}$
|
||||
|
||||
# Regular expression which should only match correct list comprehension /
|
||||
# generator expression variable names
|
||||
inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma
|
||||
good-names=i,j,k,ex,Run,_
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma
|
||||
bad-names=foo,bar,baz,toto,tutu,tata
|
||||
|
||||
# Regular expression which should only match functions or classes name which do
|
||||
# not require a docstring
|
||||
no-docstring-rgx=__.*__
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=100
|
||||
|
||||
# Maximum number of lines in a module
|
||||
max-module-lines=1000
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
# tab).
|
||||
indent-string=' '
|
||||
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
notes=FIXME,XXX,TODO
|
||||
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
min-similarity-lines=4
|
||||
|
||||
# Ignore comments when computing similarities.
|
||||
ignore-comments=yes
|
||||
|
||||
# Ignore docstrings when computing similarities.
|
||||
ignore-docstrings=yes
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# Tells whether missing members accessed in mixin class should be ignored. A
|
||||
# mixin class is detected if its name ends with "mixin" (case insensitive).
|
||||
ignore-mixin-members=yes
|
||||
|
||||
# List of classes names for which member attributes should not be checked
|
||||
# (useful for classes with attributes dynamically set).
|
||||
ignored-classes=SQLObject
|
||||
|
||||
# When zope mode is activated, add a predefined set of Zope acquired attributes
|
||||
# to generated-members.
|
||||
zope=no
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
# system, and so shouldn't trigger E0201 when accessed. Python regular
|
||||
# expressions are accepted.
|
||||
generated-members=REQUEST,acl_users,aq_parent
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
# Tells whether we should check for unused import in __init__ files.
|
||||
init-import=no
|
||||
|
||||
# A regular expression matching the beginning of the name of dummy variables
|
||||
# (i.e. not used).
|
||||
dummy-variables-rgx=_|dummy
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
# you should avoid to define new builtins when possible.
|
||||
additional-builtins=
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# List of interface methods to ignore, separated by a comma. This is used for
|
||||
# instance to not check methods defines in Zope's Interface base class.
|
||||
ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,__new__,setUp
|
||||
|
||||
# List of valid names for the first argument in a class method.
|
||||
valid-classmethod-first-arg=cls
|
||||
|
||||
|
||||
[DESIGN]
|
||||
|
||||
# Maximum number of arguments for function / method
|
||||
max-args=5
|
||||
|
||||
# Argument names that match this expression will be ignored. Default to name
|
||||
# with leading underscore
|
||||
ignored-argument-names=_.*
|
||||
|
||||
# Maximum number of locals for function / method body
|
||||
max-locals=15
|
||||
|
||||
# Maximum number of return / yield for function / method body
|
||||
max-returns=6
|
||||
|
||||
# Maximum number of branch for function / method body
|
||||
max-branchs=12
|
||||
|
||||
# Maximum number of statements in function / method body
|
||||
max-statements=50
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
max-parents=7
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
min-public-methods=2
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
max-public-methods=20
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma
|
||||
deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
|
||||
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the
|
||||
# given file (report RP0402 must not be disabled)
|
||||
import-graph=
|
||||
|
||||
# Create a graph of external dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
ext-import-graph=
|
||||
|
||||
# Create a graph of internal dependencies in the given file (report RP0402 must
|
||||
# not be disabled)
|
||||
int-import-graph=
|
||||
|
||||
|
||||
[EXCEPTIONS]
|
||||
|
||||
# Exceptions that will emit a warning when being caught. Defaults to
|
||||
# "Exception"
|
||||
overgeneral-exceptions=Exception
|
@ -0,0 +1,44 @@
|
||||
language: python
|
||||
|
||||
dist: precise
|
||||
|
||||
python:
|
||||
- "3.6"
|
||||
|
||||
before_install:
|
||||
- export TZ=Europe/Brussels
|
||||
- if [ ${TRAVIS_PYTHON_VERSION:0:1} == "2" ]; then wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; else wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; fi
|
||||
- bash miniconda.sh -b -p $HOME/miniconda
|
||||
- export PATH="$HOME/miniconda/bin:$PATH"
|
||||
- conda update --yes conda
|
||||
- conda install --yes numpy scipy
|
||||
- pip install --quiet pytest pytest-cov pytest-xdist chardet
|
||||
|
||||
install:
|
||||
- python setup.py install --quiet
|
||||
- pip freeze
|
||||
# Install and compile libsvm and liblinear
|
||||
- sudo apt-get install -y build-essential
|
||||
- git clone https://github.com/cjlin1/libsvm
|
||||
- cd libsvm; make lib; sudo cp libsvm.so.2 /lib; sudo ln -s /lib/libsvm.so.2 /lib/libsvm.so; cd ..
|
||||
- git clone https://github.com/cjlin1/liblinear
|
||||
- cd liblinear; make lib; sudo cp liblinear.so.3 /lib; sudo ln -s /lib/liblinear.so.3 /lib/liblinear.so; cd ..
|
||||
|
||||
script:
|
||||
- pytest --cov=pattern
|
||||
|
||||
|
||||
after_script:
|
||||
- pip install --quiet coveralls
|
||||
- coveralls
|
||||
|
||||
branches:
|
||||
only:
|
||||
- development
|
||||
|
||||
notifications:
|
||||
email: false
|
||||
|
||||
# You can connect to MySQL/MariaDB using the username "travis" or "root" and a blank password.
|
||||
services:
|
||||
- mysql
|
@ -0,0 +1,29 @@
|
||||
Copyright (c) 2011-2013 University of Antwerp, Belgium
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Pattern nor the names of its
|
||||
contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
After Width: | Height: | Size: 2.7 KiB |
After Width: | Height: | Size: 1.3 KiB |
After Width: | Height: | Size: 280 B |
After Width: | Height: | Size: 16 KiB |
After Width: | Height: | Size: 6.2 KiB |
After Width: | Height: | Size: 187 B |
After Width: | Height: | Size: 88 KiB |
After Width: | Height: | Size: 108 KiB |
After Width: | Height: | Size: 4.9 KiB |
After Width: | Height: | Size: 5.1 KiB |
After Width: | Height: | Size: 3.3 KiB |
After Width: | Height: | Size: 8.5 KiB |
After Width: | Height: | Size: 25 KiB |
After Width: | Height: | Size: 24 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 3.6 KiB |
After Width: | Height: | Size: 5.4 KiB |
After Width: | Height: | Size: 5.8 KiB |
After Width: | Height: | Size: 6.3 KiB |
After Width: | Height: | Size: 6.4 KiB |
After Width: | Height: | Size: 6.2 KiB |
After Width: | Height: | Size: 7.5 KiB |
After Width: | Height: | Size: 7.0 KiB |
After Width: | Height: | Size: 6.5 KiB |
After Width: | Height: | Size: 8.1 KiB |
After Width: | Height: | Size: 11 KiB |
After Width: | Height: | Size: 5.8 KiB |
After Width: | Height: | Size: 10 KiB |
After Width: | Height: | Size: 9.1 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 14 KiB |
After Width: | Height: | Size: 5.4 KiB |
After Width: | Height: | Size: 6.4 KiB |
After Width: | Height: | Size: 2.8 KiB |
After Width: | Height: | Size: 3.9 KiB |
After Width: | Height: | Size: 9.1 KiB |
After Width: | Height: | Size: 7.4 KiB |
After Width: | Height: | Size: 8.8 KiB |
After Width: | Height: | Size: 48 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 6.6 KiB |
After Width: | Height: | Size: 44 KiB |
After Width: | Height: | Size: 24 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 20 KiB |
After Width: | Height: | Size: 19 KiB |
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 2.1 KiB |
After Width: | Height: | Size: 24 KiB |
After Width: | Height: | Size: 429 B |
@ -0,0 +1,474 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>mbsp-tags</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/mbsp-tags" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/mbsp-tags</a></div>
|
||||
<h1>Penn Treebank II tag set</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1274" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p class="big"><a href="pattern.html">Pattern</a> and <a href="http://www.clips.ua.ac.be/pages/MBSP" target="_self">MBSP</a> assign meaningful tags to words and groups of words in a sentence. Each tag is a short code (such as "<span class="postag">DT</span>" for "determiner").</p>
|
||||
<p>The tag set is based on the Penn Treebank Tagging Guidelines [<a href="ftp://ftp.cis.upenn.edu/pub/treebank/doc/tagguide.ps.gz" target="_self">pdf</a>].</p>
|
||||
<h3>Part-of-speech tags</h3>
|
||||
<p>Part-of-speech tags are assigned to a single word according to its role in the sentence. Traditional grammar classifies words based on eight parts of speech: the verb (<span class="postag">VB</span>), the noun (<span class="postag">NN</span>), the pronoun (<span class="postag">PR</span>+<span class="postag">DT</span>), the adjective (<span class="postag">JJ</span>), the adverb (<span class="postag">RB</span>), the preposition (<span class="postag">IN</span>), the conjunction (<span class="postag">CC</span>), and the interjection (<span class="postag">UH</span>).</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
<td class="smallcaps">Example</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">CC </span></td>
|
||||
<td>conjunction, coordinating</td>
|
||||
<td><em>and, or, but</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">CD </span></td>
|
||||
<td>cardinal number</td>
|
||||
<td><em>five, three, 13%</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">DT </span></td>
|
||||
<td>determiner</td>
|
||||
<td><em>the, a, these <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">EX </span></td>
|
||||
<td>existential there</td>
|
||||
<td><em><span style="text-decoration: underline;">there</span> were six boys <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">FW </span></td>
|
||||
<td>foreign word</td>
|
||||
<td><em>mais <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">IN </span></td>
|
||||
<td>conjunction, subordinating or preposition</td>
|
||||
<td><em>of, on, before, unless <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">JJ </span></td>
|
||||
<td>adjective</td>
|
||||
<td><em>nice, easy </em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">JJR </span></td>
|
||||
<td>adjective, comparative</td>
|
||||
<td><em>nicer, easier</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">JJS </span></td>
|
||||
<td>adjective, superlative</td>
|
||||
<td><em>nicest, easiest <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">LS </span></td>
|
||||
<td>list item marker</td>
|
||||
<td><em> </em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">MD </span></td>
|
||||
<td>verb, modal auxillary</td>
|
||||
<td><em>may, should <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">NN </span></td>
|
||||
<td>noun, singular or mass</td>
|
||||
<td><em>tiger, chair, laughter <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">NNS </span></td>
|
||||
<td>noun, plural</td>
|
||||
<td><em>tigers, chairs, insects <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">NNP </span></td>
|
||||
<td>noun, proper singular</td>
|
||||
<td><em>Germany, God, Alice <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">NNPS </span></td>
|
||||
<td>noun, proper plural</td>
|
||||
<td><em>we met two <span style="text-decoration: underline;">Christmases</span> ago <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PDT </span></td>
|
||||
<td>predeterminer</td>
|
||||
<td><em><span style="text-decoration: underline;">both</span> his children <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">POS</span></td>
|
||||
<td>possessive ending</td>
|
||||
<td><em>'s</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PRP </span></td>
|
||||
<td>pronoun, personal</td>
|
||||
<td><em>me, you, it <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PRP$ </span></td>
|
||||
<td>pronoun, possessive</td>
|
||||
<td><em>my, your, our <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">RB </span></td>
|
||||
<td>adverb</td>
|
||||
<td><em>extremely, loudly, hard <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">RBR </span></td>
|
||||
<td>adverb, comparative</td>
|
||||
<td><em>better <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">RBS </span></td>
|
||||
<td>adverb, superlative</td>
|
||||
<td><em>best <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">RP </span></td>
|
||||
<td>adverb, particle</td>
|
||||
<td><em>about, off, up <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">SYM </span></td>
|
||||
<td>symbol</td>
|
||||
<td><em>% <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">TO </span></td>
|
||||
<td>infinitival to</td>
|
||||
<td><em>what <span style="text-decoration: underline;">to</span> do? <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">UH </span></td>
|
||||
<td>interjection</td>
|
||||
<td><em>oh, oops, gosh <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VB </span></td>
|
||||
<td>verb, base form</td>
|
||||
<td><em>think <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VBZ </span></td>
|
||||
<td>verb, 3rd person singular present</td>
|
||||
<td><em>she <span style="text-decoration: underline;">thinks </span><br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VBP </span></td>
|
||||
<td>verb, non-3rd person singular present</td>
|
||||
<td><em>I <span style="text-decoration: underline;">think </span><br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VBD </span></td>
|
||||
<td>verb, past tense</td>
|
||||
<td><em>they <span style="text-decoration: underline;">thought </span><br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VBN </span></td>
|
||||
<td>verb, past participle</td>
|
||||
<td><em>a <span style="text-decoration: underline;">sunken</span> ship <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VBG </span></td>
|
||||
<td>verb, gerund or present participle</td>
|
||||
<td><em><span style="text-decoration: underline;">thinking</span> is fun <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">WDT </span></td>
|
||||
<td><em>wh</em>-determiner</td>
|
||||
<td><em>which, whatever, whichever <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">WP </span></td>
|
||||
<td><em>wh</em>-pronoun, personal</td>
|
||||
<td><em>what, who, whom <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">WP$</span></td>
|
||||
<td><em>wh</em>-pronoun, possessive</td>
|
||||
<td><em>whose, whosever <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">WRB</span></td>
|
||||
<td><em>wh</em>-adverb</td>
|
||||
<td><em>where, when <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">. </span></td>
|
||||
<td>punctuation mark, sentence closer</td>
|
||||
<td><em>.;?* <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">, </span></td>
|
||||
<td>punctuation mark, comma</td>
|
||||
<td><em>, <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">: </span></td>
|
||||
<td>punctuation mark, colon</td>
|
||||
<td><em>: <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">( </span></td>
|
||||
<td>contextual separator, left paren</td>
|
||||
<td><em>( <br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">) </span></td>
|
||||
<td>contextual separator, right paren</td>
|
||||
<td><em>) <br /></em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3>Chunk tags</h3>
|
||||
<p>Chunk tags are assigned to groups of words that belong together (i.e. phrases). The most common phrases are the noun phrase (<span class="postag">NP</span>, for example <em>the black cat</em>) and the verb phrase (<span class="postag">VP</span>, for example <em>is purring</em>).</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
<td><span class="smallcaps">Words </span></td>
|
||||
<td><span class="smallcaps">Example </span></td>
|
||||
<td align="right">%</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">NP </span></td>
|
||||
<td>noun phrase<span class="postag"> </span></td>
|
||||
<td><span class="postag">DT</span>+<span class="postag">RB</span>+<span class="postag">JJ</span>+<span class="postag">NN</span> + <span class="postag">PR</span></td>
|
||||
<td><em>the strange bird</em></td>
|
||||
<td align="right"> 51</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PP </span></td>
|
||||
<td>prepositional phrase</td>
|
||||
<td><span class="postag">TO</span>+<span class="postag">IN </span></td>
|
||||
<td><em>in between</em></td>
|
||||
<td align="right"> 19</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">VP </span></td>
|
||||
<td>verb phrase </td>
|
||||
<td><span class="postag">RB</span>+<span class="postag">MD</span>+<span class="postag">VB </span></td>
|
||||
<td><em>was looking<br /></em></td>
|
||||
<td align="right">9</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">ADVP</span></td>
|
||||
<td>adverb phrase</td>
|
||||
<td><span class="postag">RB</span></td>
|
||||
<td><em>also<br /></em></td>
|
||||
<td align="right"> 6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">ADJP</span></td>
|
||||
<td>adjective phrase<span class="postag"> </span></td>
|
||||
<td><span class="postag">CC</span>+<span class="postag">RB</span>+<span class="postag">JJ</span></td>
|
||||
<td><em>warm and cosy</em></td>
|
||||
<td align="right"> 3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">SBAR</span></td>
|
||||
<td>subordinating conjunction </td>
|
||||
<td><span class="postag">IN</span></td>
|
||||
<td><em><span style="text-decoration: underline;">whether</span> or not<br /></em></td>
|
||||
<td align="right">3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PRT </span></td>
|
||||
<td>particle</td>
|
||||
<td><span class="postag">RP</span></td>
|
||||
<td><em><span style="text-decoration: underline;">up</span> the stairs</em></td>
|
||||
<td align="right"> 1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">INTJ</span></td>
|
||||
<td>interjection</td>
|
||||
<td><span class="postag">UH</span></td>
|
||||
<td><em>hello</em><em><br /></em></td>
|
||||
<td align="right"> 0</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>The IOB prefix marks whether a word is inside or outside of a chunk.</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">I-</span></td>
|
||||
<td>inside the chunk</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">B-</span></td>
|
||||
<td>inside the chunk, preceding word is part of a different chunk</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">O </span></td>
|
||||
<td>not part of a chunk</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>A prepositional noun phrase (<span class="postag">PNP</span>) is a group of chunks starting with a preposition (<span class="postag">PP</span>) followed by noun phrases (<span class="postag">NP</span>), for example: <em>under the table</em>.</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
<td class="smallcaps">Chunks</td>
|
||||
<td><span class="smallcaps">Example </span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">PNP</span></td>
|
||||
<td>prepositional noun phrase</td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">NP</span><span class="postag"> </span></td>
|
||||
<td><em>as of today</em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3>Relation tags</h3>
|
||||
<p>Relations tags describe the relation between different chunks, and clarify the role of a chunk in that relation. The most common roles in a sentence are <span class="postag">SBJ</span> (subject noun phrase) and <span class="postag">OBJ</span> (object noun phrase). They link <span class="postag">NP</span> to <span class="postag">VP</span> chunks. The subject of a sentence is the person, thing, place or idea that is <em>doing</em> or <em>being</em> something. The object of a sentence is the person/thing affected by the action.</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
<td class="smallcaps">Chunks</td>
|
||||
<td><span class="smallcaps">Example </span></td>
|
||||
<td align="right"><span class="smallcaps">%</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-SBJ</span></td>
|
||||
<td>sentence subject</td>
|
||||
<td><span class="postag">NP</span><span class="postag"> </span></td>
|
||||
<td><em><span style="text-decoration: underline;">the cat</span> sat on the mat<br /></em></td>
|
||||
<td align="right">35</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-OBJ</span></td>
|
||||
<td>sentence object</td>
|
||||
<td><span class="postag">NP</span>+<span class="postag">SBAR</span></td>
|
||||
<td><em>the cat grabs <span style="text-decoration: underline;">the fish</span><br /></em></td>
|
||||
<td align="right">27</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-PRD </span></td>
|
||||
<td>predicate</td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">NP</span>+<span class="postag">ADJP </span></td>
|
||||
<td><em>the cat feels <span style="text-decoration: underline;">warm and fuzzy</span><br /></em></td>
|
||||
<td align="right">7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-TMP</span></td>
|
||||
<td>temporal </td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">NP</span>+<span class="postag">ADVP</span></td>
|
||||
<td><em>arrive </em><em><span style="text-decoration: underline;">at noon</span> <br /></em></td>
|
||||
<td align="right">7</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-CLR </span></td>
|
||||
<td>closely related</td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">NP</span>+<span class="postag">ADVP </span></td>
|
||||
<td><em>work </em><em><span style="text-decoration: underline;">as a researcher</span> <br /></em></td>
|
||||
<td align="right">6</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-LOC</span></td>
|
||||
<td>location </td>
|
||||
<td><span class="postag">PP </span></td>
|
||||
<td><em>live </em><em><span style="text-decoration: underline;">in Belgium</span> <br /></em></td>
|
||||
<td align="right">4</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-DIR </span></td>
|
||||
<td>direction</td>
|
||||
<td><span class="postag">PP </span></td>
|
||||
<td><em>walk</em><em> <span style="text-decoration: underline;">towards</span> the door<br /></em></td>
|
||||
<td align="right">3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-EXT</span></td>
|
||||
<td>extent</td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">NP </span></td>
|
||||
<td><em>drop <span style="text-decoration: underline;">10 %</span><br /></em></td>
|
||||
<td align="right">1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">-PRP</span></td>
|
||||
<td>purpose</td>
|
||||
<td><span class="postag">PP</span>+<span class="postag">SBAR </span></td>
|
||||
<td><em>die <span style="text-decoration: underline;">as a result</span> of <br /></em></td>
|
||||
<td align="right">1</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3>Anchor tags</h3>
|
||||
<p>Anchor tags describe how prepositional noun phrases (<span class="postag">PNP</span>) are attached to other chunks in the sentence. For example, in the sentence, <em>I eat pizza with a fork</em>, the anchor of <em>with a fork</em> is <em>eat</em> because it answers the question: "In what way do I eat?"</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Tag </span></td>
|
||||
<td><span class="smallcaps">Description </span></td>
|
||||
<td><span class="smallcaps">Example </span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">A1</span></td>
|
||||
<td>anchor chunks that corresponds to <span class="postag">P1</span></td>
|
||||
<td><em><span style="text-decoration: underline;">eat</span> with a fork<br /></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="postag">P1 </span></td>
|
||||
<td><span class="postag">PNP</span> that corresponds to <span class="postag">A1 </span></td>
|
||||
<td><em>eat <span style="text-decoration: underline;">with a fork</span><br /></em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p> </p>
|
||||
<p><strong>Occurence estimate </strong><span class="small"><br /></span></p>
|
||||
<p><span class="small">The given percentages for chunk and relations tags are based on tenfold cross validation on sections 10 to 19 of the WSJ Corpus of the Penn Treebank II by Sabine Buchholz, from which we derived a rough indication. The estimate means that if a 100 chunk tags are found, about 50 would be <span class="postag">NP</span> tags and 35 would have a <span class="postag">SBJ</span> relation tag. About 30 of the chunks would be tagged as <span class="postag">NP-SBJ</span>, and 15 as <span class="postag">NP-OBJ</span>. </span></p>
|
||||
<p><span class="small"><span style="text-decoration: underline;">Reference</span>: Buchholz, S. (2002). <em>Memory-Based Grammatical Relation Finding</em>. ILK, Tilburg University.</span></p>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,367 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-dev</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-dev" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-dev</a></div>
|
||||
<h1>pattern.dev</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1480" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p><span class="big">Pattern is a web mining module for the Python programming language.</span></p>
|
||||
<p><span class="big">Pattern is written in Python with extensions in JavaScript. The source code is hosted on GitHub. It is licensed under BSD, so it can be freely incorporated in proprietary applications. Contributions and donations are welcomed.</span></p>
|
||||
<p>There are six core modules in the <a href="pattern.html">pattern</a> package: <a href="pattern-web.html">web</a> | <a href="pattern-db.html">db</a> | <a href="pattern-text.html">text</a> | <a href="pattern-search.html">search</a> | <a href="pattern-vector.html">vector</a> | <a href="pattern-graph.html">graph</a>.</p>
|
||||
<p><img src="../g/pattern_schema.gif" alt="" width="620" height="180" /></p>
|
||||
<hr />
|
||||
<h2>Topics</h2>
|
||||
<ul>
|
||||
<li><a href="#contribute">Contributing</a></li>
|
||||
<li><a href="#dependencies">Dependencies</a></li>
|
||||
<li><a href="#documentation">Documentation</a></li>
|
||||
<li><a href="#code">Coding conventions</a></li>
|
||||
<li><a href="#quality">Code quality</a></li>
|
||||
<li><a href="#language">Language support</a></li>
|
||||
</ul>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="contribute"></a>Contribute</h2>
|
||||
<p>The source code is hosted on <a href="https://github.com/clips/pattern" target="_blank">GitHub</a> (see <a class="noexternal link-maintenance" href="http://www.github.com/clips/pattern" target="_blank">http://ithub.com/clips/pattern</a>). GitHub is an online project hosting service with version control. Version control tracks changes to the source code, i.e., it can be rolled back to an earlier state or merged with revisions from different contributors.</p>
|
||||
<p>To work on Pattern, create a <a href="http://help.github.com/fork-a-repo/" target="_blank">fork</a> of the project, a local copy of the source code that can be edited and updated by you alone. You can manage this copy with the free GitHub application (<a class="noexternal link-maintenance" href="http://windows.github.com/" target="_blank">windows</a> | <a class="noexternal link-maintenance" href="http://mac.github.com/" target="_blank">mac</a>). When you are ready, send us a <a href="http://help.github.com/send-pull-requests/" target="_blank">pull</a> request and we will integrate your changes in the main project.</p>
|
||||
<p>Let us know if you encounter a bug. We prefer if you create an <a href="https://github.com/clips/pattern/issues" target="_blank">issue</a> on GitHub, so that (until fixed) the problem is visible to all users of Pattern. There is a blue button for donations on the main documentation page. Please support the development if you use Pattern commercially.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="dependencies"></a>Dependencies</h2>
|
||||
<p>There are six core modules in the package:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Module</span></td>
|
||||
<td><span class="smallcaps">Functionality</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.web</td>
|
||||
<td>Asynchronous requests, web services, web crawler, HTML DOM parser.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.db</td>
|
||||
<td>Wrappers for databases (MySQL, SQLite) and CSV-files.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.text</td>
|
||||
<td>Base classes for parsers, parse trees and sentiment analysis.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.search</td>
|
||||
<td>Pattern matching algorithm for parsed text (syntax & semantics).</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.vector</td>
|
||||
<td>Vector space model, clustering, classification.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>pattern.graph</td>
|
||||
<td>Graph analysis & visualization.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>There are two helper modules: pattern.metrics (statistics) and canvas.js (visualization).</p>
|
||||
<h3>Design philosophy</h3>
|
||||
<p>Pattern is written in Python, with JavaScript extensions for data visualization (graph.js and canvas.js). The package works out of the box. If C/C++ code is bundled for performance (e.g., LIBSVM), it includes precompiled binaries for all major platforms (Windows, Linux, Mac).</p>
|
||||
<p>Pattern modules are standalone. If a module imports another module, it fails silently if that module is not present. For example, pattern.text implements a parser that uses a Perceptron language model when pattern.vector is present, but falls back to a lexicon of known words and rules for unknown words if used by itself. A single module can have a lot of interdependent classes, hence the large __init.__.py files.</p>
|
||||
<p>Pattern modules can bundle other BSD-licensed Python projects (e.g., BeautifulSoup). For larger projects or GPL-licensed projects, it provides code to map data structures.</p>
|
||||
<h3>Base classes</h3>
|
||||
<p>In pattern.web, each web service (e.g., Google, Twitter) inherits from <span class="inline_code">SearchEngine</span> and returns <span class="inline_code">Result</span> objects. Each MediaWiki web service (e.g., Wikipedia, Wiktionary) inherits from <span class="inline_code">MediaWiki</span>.</p>
|
||||
<p>In pattern.db, each database engine is wrapped by <span class="inline_code">Database</span>. It supports MySQL and SQLite, with future plans for MongoDB. See <span class="inline_code">Database</span><span class="inline_code">.connect()</span>, <span class="inline_code">escape()</span>, <span class="inline_code">_field_SQL()</span> and <span class="inline_code">_update()</span>.</p>
|
||||
<p>In pattern.text, each language inherits from <span class="inline_code">Parser</span>, having a lexicon of known words and an optional language model. Case studies for <a class="link-maintenance" href="http://www.clips.ua.ac.be/pages/using-wikicorpus-nltk-to-build-a-spanish-part-of-speech-tagger">Spanish</a> and <a class="link-maintenance" href="http://www.clips.ua.ac.be/pages/using-wiktionary-to-build-an-italian-part-of-speech-tagger">Italian</a> show how to train a <span class="inline_code">Lexicon</span>. A bundled pattern.vector example shows how to train a Perceptron <span class="inline_code">Model</span>.</p>
|
||||
<p>In pattern.vector, each classifier inherits from <span class="inline_code">Classifier</span> (e.g., KNN, SVM). Each clustering algorithm is available from <span class="inline_code">Model.cluster()</span>.</p>
|
||||
<p>In pattern.graph, subclasses of <span class="inline_code">Node</span> or <span class="inline_code">Edge</span> can be used with (subclasses of) <span class="inline_code">Graph</span> by setting the <span class="inline_code">base</span> parameter of <span class="inline_code">Graph.add_node()</span> and <span class="inline_code">add_edge()</span>. Each layout algorithm (e.g., force-based springs) inherits from <span class="inline_code">GraphLayout</span>.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="documentation"></a>Documentation</h2>
|
||||
<p>Each function or method has a docstring:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">def find(match=lambda item: False, list=[]):
|
||||
""" Returns the first item in the given list for which match(item) is True.
|
||||
"""
|
||||
for item in list:
|
||||
if match(item) is True:
|
||||
return item</pre></div>
|
||||
<p>The docstring provides a concise description of the type of input and output. In Pattern, a docstrings starts with "Returns" (for a function) or "Yields" (for a property). Each function has a unit test, to verify that it is fit for use. Each function has an engaging example, bundled in the package or in the documentation.</p>
|
||||
<p>Pattern does not have a documentation framework. The documentation is written by hand and in constant revision. Please report spelling errors and examples with bugs.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="code"></a>Coding conventions</h2>
|
||||
<h3>Whitespace</h3>
|
||||
<p>The source code is not strict <a href="http://www.python.org/dev/peps/pep-0008/" target="_blank">PEP8</a>. For example, additional whitespace is used so that property assignments or inline comments are vertically aligned as a block:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">class Table(object):
|
||||
def __init__(self, name, database):
|
||||
""" A collection of rows with one or more fields of a certain type.
|
||||
"""
|
||||
self.database = database
|
||||
self.name = name
|
||||
self.fields = [] # List of field names (i.e., column names).
|
||||
self.schema = {} # Dictionary of (field, Schema)-items.
|
||||
self.default = {} # Default values for Table.insert().
|
||||
self.primary_key = None
|
||||
self._update()</pre></div>
|
||||
<p>Whitespace is sometimes used to align dictionary keys and values:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">url = URL('http://search.twitter.com/search.json?', method=GET, query={
|
||||
'q': query,
|
||||
'page': start,
|
||||
'rpp': min(count, 100)
|
||||
})</pre></div>
|
||||
<h3>Class and function names</h3>
|
||||
<p>Single words are preferred for class names. Compound terms use CamelCase, e.g., <span class="inline_code">SearchEngine</span> or <span class="inline_code">AsynchronousRequest</span>. Single, descriptive words are preferred for functions and methods. Compound terms use lowercase_with_underscore. If a method takes no arguments, it is a property:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">class AsynchronousRequest:
|
||||
@property
|
||||
def done(self):
|
||||
return not self._thread.isAlive() # We'd prefer "_thread.alive".</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">while not request.done:
|
||||
... </pre></div>
|
||||
<h3>Variable names</h3>
|
||||
<p>The source code uses single character names abundantly. For example, dictionary <span style="text-decoration: underline;">k</span>eys and <span style="text-decoration: underline;">v</span>alues are <span class="inline_code">k</span> and <span class="inline_code">v</span>, a string is <span class="inline_code">s</span>. This is done to make the structure of the algorithm stand out (i.e., the actual function and method calls):</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">def normalize(s, punctuation='!?.:;,()[] '):
|
||||
s = s.decode('utf-8')
|
||||
s = s.lower()
|
||||
s = s.strip(punctuation)
|
||||
return s</pre></div>
|
||||
<p>Frequently used single character variable names:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="smallcaps">Variable</span></td>
|
||||
<td><span class="smallcaps">Meaning</span></td>
|
||||
<td><span class="smallcaps">Example</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">a</span></td>
|
||||
<td>array, all</td>
|
||||
<td><span class="inline_code">a = [normalize(w) for w in words]</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">b</span></td>
|
||||
<td>boolean</td>
|
||||
<td><span class="inline_code">while b is False:</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">d</span></td>
|
||||
<td>distance, document</td>
|
||||
<td><span class="inline_code">d = distance(v1, v2)</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">e</span></td>
|
||||
<td>element</td>
|
||||
<td><span class="inline_code">e = html.find('#nav')</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">f</span></td>
|
||||
<td>file, filter, function</td>
|
||||
<td><span class="inline_code">f = open('data.csv', 'r')</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">i</span></td>
|
||||
<td>index</td>
|
||||
<td><span class="inline_code">for i in range(len(matrix)):</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">j</span></td>
|
||||
<td>index</td>
|
||||
<td><span class="inline_code">for j in range(len(matrix[i])):</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">k</span></td>
|
||||
<td>key</td>
|
||||
<td><span class="inline_code">for k in vector.keys():</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">n</span></td>
|
||||
<td>list length</td>
|
||||
<td><span class="inline_code">n = len(a)</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">p</span></td>
|
||||
<td>parser, pattern</td>
|
||||
<td><span class="inline_code">p = pattern.search.compile('NN')</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">q</span></td>
|
||||
<td>query</td>
|
||||
<td><span class="inline_code">for r in twitter.search(q):</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">r</span></td>
|
||||
<td>result, row</td>
|
||||
<td><span class="inline_code">for r in csv('data.csv):</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">s</span></td>
|
||||
<td>string</td>
|
||||
<td><span class="inline_code">s = s.decode('utf-8').strip()</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">t</span></td>
|
||||
<td>time</td>
|
||||
<td><span class="inline_code">t = time.time() - t0</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">v</span></td>
|
||||
<td>value, vector</td>
|
||||
<td><span class="inline_code">for k, v in vector.items():</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">w</span></td>
|
||||
<td>word</td>
|
||||
<td><span class="inline_code">for i, w in enumerate(sentence.words):</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">x</span></td>
|
||||
<td>horizontal position</td>
|
||||
<td><span class="inline_code">node.x = 0</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="text-align: center;"><span class="inline_code">y</span></td>
|
||||
<td>vertical position</td>
|
||||
<td><span class="inline_code">node.y = 0</span></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3>Dictionaries</h3>
|
||||
<p>The source code uses dictionaries abundantly. Dictionaries are fast for lookup. For example, pattern.vector represents vectors as sparse feature → weight dictionaries:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">v1 = document1.vector
|
||||
v2 = document2.vector
|
||||
cos = sum(v1.get(w,0) * f for w, f in v2.items()) / (norm(v1) * norm(v2) or 1)</pre></div>
|
||||
<p>Pattern algorithms are <a class="link-maintenance" href="pattern-metrics.html#profile">profiled</a> and optimized with caching mechanisms.</p>
|
||||
<h3>List comprehensions</h3>
|
||||
<p>The source code uses list comprehension abundantly. It is concise, and often faster than <span class="inline_code">map()</span>. However, it can also be harder to read (a comment should be added).</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">def words(s, punctuation='!?.:;,()[] '):
|
||||
return [w.strip(punctuation) for w in s.split()]
|
||||
</pre></div>
|
||||
<h3>Ternary operator</h3>
|
||||
<p>Previous versions of Pattern supported Python 2.4, which does have the ternary operator (single-line if). A part of the source code still uses a boolean condition to emulate it:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">s = s.lower() if lowercase is True else s # Python 2.5+</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">s = lowercase is True and s.lower() or s # Python 2.4</pre></div>
|
||||
<p>With boolean conditions, care must be taken for values <span class="inline_code">0</span>, <span class="inline_code">''</span>, <span class="inline_code">[]</span>, <span class="inline_code">()</span>, <span class="inline_code">{}</span>, and <span class="inline_code">None</span>, since they evaluate as <span class="inline_code">False</span> and trigger the or-clause.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="quality"></a>Code quality</h2>
|
||||
<p>The source code has about 25,000 lines of Python code (25% unit tests), 5,000 lines of JavaScript, and 20,000 lines of bundled dependencies (BeautifulSoup, PDFMiner, PyWordNet, LIBSVM, LIBLINEAR, etc.). To evaluate the code quality, <a href="http://www.logilab.org/857" target="_blank">pylint</a> can be used:</p>
|
||||
<div class="install">
|
||||
<pre class="gutter:false; light:true;">> cd pattern-2.x
|
||||
> pylint pattern --rcfile=.pylintrc</pre></div>
|
||||
<p>Important pylint id's are those starting with <span class="inline_code">E</span> (= possible bugs).</p>
|
||||
<p>The <span class="inline_code">.pylintrc</span> configuration file defines a number of custom settings:</p>
|
||||
<ul>
|
||||
<li>Instead of 80 characters per line, a 100 characters are allowed.</li>
|
||||
<li>Ignore pylint id <span class="inline_code">C0103</span>, single-character variable names are allowed.</li>
|
||||
<li>Ignore pylint id <span class="inline_code">W0142</span>, <span class="inline_code">*args</span> and <span class="inline_code">**kwargs</span> are allowed.</li>
|
||||
<li>Ignore bundled dependencies.</li>
|
||||
</ul>
|
||||
<p>The source code scores about 7.38 / 10. A known issue is the absence of docstrings in unit tests.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="language"></a>Language support</h2>
|
||||
<p>Pattern currently has natural language processing tools (e.g., pattern.en, pattern.es) for most languages on the to-do list. There is no sentiment analysis yet for Spanish and German. Chinese is an open task.</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Language</span></td>
|
||||
<td style="text-align: center;"><span class="smallcaps">Code</span></td>
|
||||
<td style="text-align: center;"><span class="smallcaps">Speakers</span></td>
|
||||
<td><span class="smallcaps">Example countries</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mandarin</td>
|
||||
<td style="text-align: center;"><span class="inline_code">cmn</span></td>
|
||||
<td style="text-align: center;">955M</td>
|
||||
<td>China + Taiwan (945), Singapore (3)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>Spanish</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">es</span></td>
|
||||
<td style="text-align: center;">350M</td>
|
||||
<td>Argentina (40), Colombia (40), Mexico (100), Spain (45)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>English</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">en</span></td>
|
||||
<td style="text-align: center;">340M</td>
|
||||
<td>Canada (30), United Kingdom (60), United States (300)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>German</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">de</span></td>
|
||||
<td style="text-align: center;">100M</td>
|
||||
<td>Austria (10), Germany (80), Switzerland (7)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>French</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">fr</span></td>
|
||||
<td style="text-align: center;">70M</td>
|
||||
<td>France (65), Côte d'Ivoire (20)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>Italian</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">it</span></td>
|
||||
<td style="text-align: center;">60M</td>
|
||||
<td>Italy (60)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><s>Dutch</s></td>
|
||||
<td style="text-align: center;"><span class="inline_code">nl</span></td>
|
||||
<td style="text-align: center;">25M</td>
|
||||
<td>The Netherlands (25), Belgium (5), Suriname (1)</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>There are two case studies that demonstrate how to build a pattern.xx language module:</p>
|
||||
<ul>
|
||||
<li><a href="http://www.clips.ua.ac.be/pages/using-wiktionary-to-build-an-italian-part-of-speech-tagger">Using Wikitionary to build an Italian part-of-speech tagger</a></li>
|
||||
<li><a href="http://www.clips.ua.ac.be/pages/using-wikicorpus-nltk-to-build-a-spanish-part-of-speech-tagger">Using Wikicorpus & NLTK to build a Spanish part-of-speech tagger</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,590 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-fr</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-fr" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-fr</a></div>
|
||||
<h1>pattern.fr</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1697" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p><span class="big">The pattern.fr module contains a fast part-of-speech tagger for French (identifies nouns, adjectives, verbs, etc. in a sentence), sentiment analysis, and tools for French verb conjugation and noun singularization & pluralization.</span></p>
|
||||
<p>It can be used by itself or with other <a href="pattern.html">pattern</a> modules: <a href="pattern-web.html">web</a> | <a href="pattern-db.html">db</a> | <a href="pattern-en.html">en</a> | <a href="pattern-search.html">search</a> | <a href="pattern-vector.html">vector</a> | <a href="pattern-graph.html">graph</a>.</p>
|
||||
<p><img src="../g/pattern_schema_fr.gif" alt="" /></p>
|
||||
<hr />
|
||||
<h2>Documentation</h2>
|
||||
<p>The functions in this module take the same parameters and return the same values as their counterparts in <a href="pattern-en.html">pattern.en</a>. Refer to the documentation there for more details. </p>
|
||||
<h3>Noun singularization & pluralization</h3>
|
||||
<p>For French nouns there is <span class="inline_code">singularize()</span> and <span class="inline_code">pluralize()</span>. The implementation uses a statistical approach with 93% accuracy for singularization and 92% for pluralization.</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import singularize, pluralize
|
||||
>>>
|
||||
>>> print singularize('chats')
|
||||
>>> print pluralize('chat')
|
||||
|
||||
chat
|
||||
chats </pre></div>
|
||||
<h3>Verb conjugation</h3>
|
||||
<p>For French verbs there is <span class="inline_code">conjugate()</span>, <span class="inline_code">lemma()</span>, <span class="inline_code">lexeme()</span> and <span class="inline_code">tenses()</span>. The lexicon for verb conjugation contains about 1,750 common French verbs (constructed with Bob Salita's verb conjugation rules). For unknown verbs it will fall back to regular expressions with an accuracy of about 83%. </p>
|
||||
<p>French verbs have more tenses than English verbs. In particular, the plural differs for each person, and there are additional forms for the <span class="inline_code">FUTURE</span> tense, the <span class="inline_code">IMPERATIVE</span>, <span class="inline_code">CONDITIONAL</span> and <span class="inline_code">SUBJUNCTIVE</span> mood and the <span class="inline_code">PERFECTIVE</span> aspect:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import conjugate
|
||||
>>> from pattern.fr import INFINITIVE, PRESENT, PAST, SG, SUBJUNCTIVE, PERFECTIVE
|
||||
>>>
|
||||
>>> print conjugate('suis', INFINITIVE)
|
||||
>>> print conjugate('suis', PRESENT, 1, SG, mood=SUBJUNCTIVE)
|
||||
>>> print conjugate('suis', PAST, 3, SG)
|
||||
>>> print conjugate('suis', PAST, 3, SG, aspect=PERFECTIVE)
|
||||
|
||||
être
|
||||
sois
|
||||
était
|
||||
fut </pre></div>
|
||||
<p>For <span class="inline_code">PAST</span> tense + <span class="inline_code">PERFECTIVE</span> aspect we can also use <span class="inline_code">PRETERITE</span> (<em>passé simple</em>). For <span class="inline_code">PAST</span> tense + <span class="inline_code">IMPERFECTIVE</span> aspect we can also use <span class="inline_code">IMPERFECT</span> (<em>imparfait</em>):</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import conjugate
|
||||
>>> from pattern.fr import IMPERFECT, PRETERITE
|
||||
>>>
|
||||
>>> print conjugate('suis', IMPERFECT, 3, SG)
|
||||
>>> print conjugate('suis', PRETERITE, 3, SG)
|
||||
|
||||
était
|
||||
fut </pre></div>
|
||||
<p> The <span class="inline_code">conjugate()</span> function takes the following optional parameters:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="smallcaps">Tense</td>
|
||||
<td class="smallcaps">Person</td>
|
||||
<td class="smallcaps">Number</td>
|
||||
<td class="smallcaps">Mood</td>
|
||||
<td class="smallcaps">Aspect</td>
|
||||
<td class="smallcaps">Alias</td>
|
||||
<td class="smallcaps">Example</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">INFINITVE</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">"inf"</td>
|
||||
<td><em>être</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">suis</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">es</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">est</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">sommes</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">êtes</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">sont</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PROGRESSIVE</td>
|
||||
<td class="inline_code">"part"</td>
|
||||
<td><em>étant</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg!"</td>
|
||||
<td><em>sois</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl!"</td>
|
||||
<td><em>soyons</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl!"</td>
|
||||
<td><em>soyez</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg->"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">serais</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg->"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">serais</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg->"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">serait</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl->"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">serions</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl->"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">seriez</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl->"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">seraient</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg?"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">sois</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg?"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">sois</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg?"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">soit</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl?"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">soyons</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl?"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">soyez</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl?"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">soient</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp"</td>
|
||||
<td><em>j' <span style="text-decoration: underline;">étais</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">étais</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">était</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">étions</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">étiez</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">étaient</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PROGRESSIVE</td>
|
||||
<td class="inline_code">"ppart"</td>
|
||||
<td><em>été</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp+"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">fus</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp+"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">fus</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp+"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">fut</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl+"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">fûmes</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl+"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">fûtes</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl+"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">furent</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp?"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">fusse</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp?"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">fusses</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp?"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">fût</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl?"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">fussions</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl?"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">fussiez</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl?"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">fussent</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgf"</td>
|
||||
<td><em>je <span style="text-decoration: underline;">serai</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgf"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">seras</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgf"</td>
|
||||
<td><em>il <span style="text-decoration: underline;">sera</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1plf"</td>
|
||||
<td><em>nous <span style="text-decoration: underline;">serons</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2plf"</td>
|
||||
<td><em>vous <span style="text-decoration: underline;">serez</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3plf"</td>
|
||||
<td><em>ils <span style="text-decoration: underline;">seron</span></em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>Instead of optional parameters, a single short alias, or <span class="inline_code">PARTICIPLE</span> or <span class="inline_code">PAST+PARTICIPLE</span> can also be given. With no parameters, the infinitive form of the verb is returned.</p>
|
||||
<p class="small"><span style="text-decoration: underline;">Reference</span><span>: Salita, B. (2011). <em>French Verb Conjugation Rules</em>. Retrieved from: </span><span><a class="noexternal" style="color: inherit;" href="http://fvcr.sourceforge.net/" target="_blank">http://fvcr.sourceforge.net</a>.</span></p>
|
||||
<h3>Attributive & predicative adjectives </h3>
|
||||
<p>French adjectives inflect with an <span class="inline_code">-e</span>, <span class="inline_code">-s</span> or <span class="inline_code">-es</span> suffix depending on gender. There are many irregular cases (e.g., <em>curieux</em> → <em>une fille curieuse</em>). You can get the base form with the <span class="inline_code">predicative()</span> function. A statistical approach is used with an accuracy of 95%.</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import predicative
|
||||
>>> print predicative('curieuse')
|
||||
|
||||
curieux </pre></div>
|
||||
<h3>Sentiment analysis</h3>
|
||||
<p class="example">For opinion mining there is <span class="inline_code">sentiment()</span>, which returns a (<span class="inline_code">polarity</span>, <span class="inline_code">subjectivity</span>)-tuple, based on a lexicon of adjectives. Polarity is a value between <span class="inline_code">-1.0</span> and <span class="inline_code">+1.0</span>, subjectivity between <span class="inline_code">0.0</span> and <span class="inline_code">1.0</span>. The accuracy is around 74% (P 0.77, R 0.73) for book reviews:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import sentiment
|
||||
>>> print sentiment('Un livre magnifique!')
|
||||
|
||||
(1.0, 1.0) </pre></div>
|
||||
<h3>Parser</h3>
|
||||
<p>For parsing there is <span class="inline_code">parse()</span>, <span class="inline_code">parsetree()</span> and <span class="inline_code">split()</span>. The <span class="inline_code">parse()</span> function annotates words in the given string with their part-of-speech <a class="link-maintenance" href="mbsp-tags.html">tags</a> (e.g., <span class="postag">NN</span> for nouns and <span class="postag">VB</span> for verbs). The <span class="inline_code">parsetree()</span> function takes a string and returns a tree of nested objects (<span class="inline_code">Text</span> → <span class="inline_code">Sentence</span> → <span class="inline_code">Chunk</span> → <span class="inline_code">Word</span>). The <span class="inline_code">split()</span> function takes the output of <span class="inline_code">parse()</span> and returns a <span class="inline_code">Text</span>. See the <span class="inline_code">pattern.en</span> documentation (<span class="link-maintenance" style="color: #78aaff;"><a style="color: #8caaff; outline-style: none !important; outline-width: initial !important; outline-color: initial !important;" href="pattern-en.html#tree">here</a></span>) how to manipulate <span class="inline_code">Text</span> objects. </p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.fr import parse, split
|
||||
>>>
|
||||
>>> s = parse(u"Le chat noir s'était assis sur le tapis.")
|
||||
>>> for sentence in split(s):
|
||||
>>> print sentence
|
||||
|
||||
Sentence('Le/DT/B-NP/O chat/NN/I-NP/O noir/JJ/I-NP/O'
|
||||
"s'/PRP/B-NP/O était/VB/B-VP/O assis/VBN/I-VP/O"
|
||||
'sur/IN/B-PP/B-PNP le/DT/B-NP/I-PNP tapis/NN/I-NP/I-PNP ././O/O')
|
||||
</pre></div>
|
||||
<p>The parser is based on <a href="http://alpage.inria.fr/~sagot/lefff-en.html">Le<em>fff</em></a>. For words in Le<em>fff</em> that can have multiple part-of-speech tags, we used <a href="http://www.lexique.org/">Lexique</a> to find the most frequent POS-tag. </p>
|
||||
<p class="small"><span style="text-decoration: underline;">References</span>: </p>
|
||||
<p class="small">Sagot, B. (2010). The Le<em>fff</em>, a freely available and large-coverage morphological and syntantic lexicon for French. <em>Proceedings of LREC'10</em>.</p>
|
||||
<p class="small">New, B., Pallier, C., Ferrand, L. & Matos, R. (2001). A lexical database for contemporary french: LEXIQUE. <em>L'année Psychologique</em>. </p>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,431 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-graph</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-graph" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-graph</a></div>
|
||||
<h1>pattern.graph</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1392" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p class="big"><span style="font-size: 16px;">The pattern.graph module has tools for graph analysis (shortest path, centrality) and graph visualization in the browser. A graph is a network of nodes connected by edges. It can be used for example to study social networks or to model semantic relationships between concepts.</span></p>
|
||||
<p>It can be used by itself or with other <a href="pattern.html">pattern</a> modules: <a href="pattern-web.html">web</a> | <a href="pattern-db.html">db</a> | <a href="pattern-en.html">en</a> | <a href="pattern-search.html">search</a> | <a href="pattern-vector.html">vector</a> | graph.</p>
|
||||
<p><img style="border: 0px initial initial;" src="../g/pattern_schema.gif" alt="" width="620" height="180" /></p>
|
||||
<hr />
|
||||
<h2>Documentation</h2>
|
||||
<ul>
|
||||
<li><a href="#node">Node</a></li>
|
||||
<li><a href="#edge">Edge</a></li>
|
||||
<li><a href="#graph">Graph</a></li>
|
||||
<li><a href="#layout">Graph layout</a></li>
|
||||
<li><a href="#utility">Graph adjacency</a></li>
|
||||
<li><a href="#canvas">Visualization</a> <span class="link-maintenance">(</span><a class="link-maintenance" href="#canvas"><span class="smallcaps link-maintenance">export</span></a><span class="link-maintenance">)</span></li>
|
||||
<li><a href="#javascript">graph.js</a></li>
|
||||
</ul>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="node"></a>Node</h2>
|
||||
<p>A <span class="inline_code">Node</span> is an element with a unique id (a string or <span class="inline_code">int</span>) in a graph. A graph is a network of nodes and edges (connections between nodes). For example, the World Wide Web (WWW) can be represented as a vast graph with websites as nodes, website URLs as node id's, and hyperlinks as edges. Graph analysis can then be used to find important nodes (i.e., popular websites) and the shortest path between them.</p>
|
||||
<p>A <span class="inline_code">Node</span> takes a number of optional parameters used to style the graph <a class="link-maintenance" href="#canvas">visualization</a> of the graph: <span class="inline_code">radius</span> (node size), <span class="inline_code">text</span>, <span class="inline_code">fill</span> and <span class="inline_code">stroke</span> (colors; each a tuple of <a href="http://en.wikipedia.org/wiki/RGBA">RGBA</a> values between <span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>), <span class="inline_code">strokewidth</span>, <span class="inline_code">font</span>, <span class="inline_code">fontsize</span> and <span class="inline_code">fontweight</span>.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">node = Node(id="", **kwargs)</pre><pre class="brush:python; gutter:false; light:true;">node.graph # Parent Graph.
|
||||
node.id # Unique string or int.
|
||||
node.links # List of Node objects.
|
||||
node.edges # List of Edge objects.
|
||||
node.edge(node, reverse=False)
|
||||
</pre><pre class="brush:python; gutter:false; light:true;">node.weight # Eigenvector centrality (0.0-1.0).
|
||||
node.centrality # Betweenness centrality (0.0-1.0).
|
||||
node.degree # Degree centrality (0.0-1.0). </pre><pre class="brush:python; gutter:false; light:true;">node.x # 2D horizontal offset.
|
||||
node.y # 2D vertical offset.
|
||||
node.force # 2D Vector, updated by Graph.layout.
|
||||
node.radius # Default: 5
|
||||
node.fill # Default: None
|
||||
node.stroke # Default: (0,0,0,1)
|
||||
node.strokewidth # Default: 1
|
||||
node.text # Text object, or None.</pre><pre class="brush:python; gutter:false; light:true;">node.flatten(depth=1, traversable=lambda node, edge: True)
|
||||
</pre><ul>
|
||||
<li><span class="inline_code">Node.edge(node)</span> returns the <span class="inline_code">Edge</span> from this node to the given <span class="inline_code">node</span>, or <span class="inline_code">None</span>.</li>
|
||||
<li><span class="inline_code">Node.flatten()</span> returns a list with the node itself (<span class="inline_code">depth=0</span>), directly connected nodes (<span class="inline_code">depth=1</span>), nodes connected to those nodes (<span class="inline_code">depth=2</span>), and so on.</li>
|
||||
</ul>
|
||||
<p><span class="smallcaps">node weight and centrality</span></p>
|
||||
<p>A well-known task in graph analysis is measuring how important or <em>central</em> each node in the graph is. The pattern.graph module has three centrality measurements, adopted from <a href="http://networkx.lanl.gov/">NetworkX</a>.</p>
|
||||
<p><span class="inline_code">Node.weight</span> is the node's <em>eigenvector</em> centrality (= incoming traffic) as a value between <span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>. Nodes with more (indirect) incoming edges have a higher weight. For example, in the WWW, popular websites are those that are often linked to, where the popularity of the referring websites is taken into account.</p>
|
||||
<p><span class="inline_code">Node.centrality</span> is the node's <em>betweenness</em> centrality (= passing traffic) as a value between <span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>. Nodes that occur more frequently in paths between other nodes have a higher betweenness. They are often found at the intersection of different clusters of nodes (e.g., like a broker or a bridge).</p>
|
||||
<p><span class="inline_code">Node.degree</span> is the node's <em>degree</em> centrality (= local traffic) as a value between <span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>. Nodes with more edges have a higher degree.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="edge"></a>Edge</h2>
|
||||
<p>An <span class="inline_code">Edge</span> is a connection between two nodes. Its <span class="inline_code">weight</span> defines the importance of the connection. Edges with a higher weight are preferred when traversing the path between two (indirectly) connected nodes.</p>
|
||||
<p>An <span class="inline_code">Edge</span> takes optional parameters <span class="inline_code">stroke</span> (a tuple of <a href="http://en.wikipedia.org/wiki/RGBA">RGBA</a> values between <span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>) and <span class="inline_code">strokewidth</span>, which can be used to style the graph <a class="link-maintenance" href="#canvas">visualization</a>.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">edge = Edge(node1, node2, weight=0.0, length=1.0, type=None, **kwargs)</pre><pre class="brush:python; gutter:false; light:true;">edge.node1 # Node (sender).
|
||||
edge.node2 # Node (receiver).
|
||||
edge.weight # Connection strength.
|
||||
edge.length # Length modifier for the visualization.
|
||||
edge.type # Useful in semantic networks.
|
||||
edge.stroke # Default: (0,0,0,1)
|
||||
edge.strokewidth # Default: 1 </pre><p class="smallcaps"><br />directed graph</p>
|
||||
<p>An edge can be traversed in both directions: from <span class="inline_code">node1</span> → <span class="inline_code">node2</span>, and from <span class="inline_code">node2</span> → <span class="inline_code">node1</span>. The <span class="inline_code">Graph.shortest_path()</span> and <span class="inline_code">Graph.betweenness_centrality()</span> methods have a <span class="inline_code">directed</span> parameter which can be set to <span class="inline_code">True</span>, so that edges are only traversed from <span class="inline_code">node1</span> → <span class="inline_code">node2</span>. This is called a directed graph. Evidently, it produces different shortest paths and node weights.</p>
|
||||
<p>Two nodes can be connected by at most two edges (one in each direction). Otherwise, <span class="inline_code">Graph.add_edge()</span> simply returns the edge that already exists between the given nodes.</p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="graph"></a>Graph</h2>
|
||||
<p>A <span class="inline_code">Graph</span> is a network of nodes connected by edges, with methods for finding paths between (indirectly) connected nodes.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">graph = Graph(layout=SPRING, distance=10.0)</pre><pre class="brush:python; gutter:false; light:true;">graph[id] # Node with given id (Graph is a subclass of dict).
|
||||
graph.nodes # List of Node objects.
|
||||
graph.edges # List of Edge objects.
|
||||
graph.density # < 0.35 => sparse, > 0.65 => dense
|
||||
graph.layout # GraphSpringLayout.
|
||||
graph.distance # GraphSpringLayout spacing.
|
||||
</pre><pre class="brush:python; gutter:false; light:true;">graph.add_node(id) # Creates + returns new Node.
|
||||
graph.add_edge(id1, id2) # Creates + returns new Edge.
|
||||
graph.remove(node) # Removes given Node + edges.
|
||||
graph.remove(edge) # Removes given Edge.
|
||||
graph.prune(depth=0) # Removes nodes + edges if len(node.links) <= depth.
|
||||
graph.node(id) # Returns node with given id.
|
||||
graph.edge(id1, id2) # Returns edge connecting the given nodes.
|
||||
graph.copy(nodes=ALL) # Returns a new Graph.
|
||||
graph.split() # Returns a list of (unconnected) graphs.
|
||||
</pre><pre class="brush:python; gutter:false; light:true;">graph.eigenvector_centrality() # Updates all Node.weight values.
|
||||
graph.betweenness_centrality() # Updates all Node.centrality values. </pre><pre class="brush:python; gutter:false; light:true;">graph.shortest_path(node1, node2, heuristic=None, directed=False)
|
||||
graph.shortest_paths(node, heuristic=None, directed=False)
|
||||
graph.paths(node1, node2, length=4)
|
||||
graph.fringe(depth=0, traversable=lambda node, edge: True)
|
||||
</pre><pre class="brush:python; gutter:false; light:true;">graph.update(iterations=10, weight=10, limit=0.5)</pre><ul>
|
||||
<li><span class="inline_code"><span><span class="inline_code">Graph.add_node()</span></span></span> takes an id + any optional parameter of <span><span class="inline_code">Node</span></span>.</li>
|
||||
<li><span class="inline_code">Graph.add_edge()</span> takes two id's + any optional parameter of <span class="inline_code">Edge</span>.<br />Both methods have an optional <span class="inline_code">base</span> parameter that defines the subclass of <span class="inline_code">Node</span> or <span class="inline_code">Edge</span> to use.</li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li><span class="inline_code">Graph.prune()</span> removes all nodes with less or equal (undirected) connections than <span class="inline_code">depth</span>.</li>
|
||||
<li><span class="inline_code">Graph.copy()</span> returns a new <span class="inline_code">Graph</span> from the given list of nodes.</li>
|
||||
<li><span class="inline_code">Graph.split()</span> return a list of unconnected subgraphs.</li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li><span class="inline_code"><span><span class="inline_code">Graph.paths()</span></span></span> returns all paths (each a list of nodes) <= <span class="inline_code">length</span> connecting two given nodes.</li>
|
||||
<li><span class="inline_code"><span><span class="inline_code">Graph.shortest_path()</span></span></span> returns a list of nodes connecting the two given nodes<span class="inline_code"><span>.</span><br /></span></li>
|
||||
<li><span class="inline_code">Graph.shortest_paths()</span> returns a dictionary of node <span style="line-height: normal;">→</span> shortest path.<br />The optional <span class="inline_code">heuristic</span> function takes two node id's and returns a penalty (<span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>) for traversing their edges. With <span class="inline_code">directed=True</span>, edges are only traversable in one direction.</li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li><span class="inline_code">Graph.fringe()</span> returns a list of <em>leaf</em> nodes.<br />With <span class="inline_code">depth=0</span>, returns the nodes with one edge.<br />With <span class="inline_code">depth=1</span>, returns the nodes with one edge + the connected nodes, etc.</li>
|
||||
</ul>
|
||||
<p>For example:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.graph import Graph
|
||||
>>>
|
||||
>>> g = Graph()
|
||||
>>> for n1, n2 in (
|
||||
>>> ('cat', 'tail'), ('cat', 'purr'), ('purr', 'sound'),
|
||||
>>> ('dog', 'tail'), ('dog', 'bark'), ('bark', 'sound')):
|
||||
>>> g.add_node(n1)
|
||||
>>> g.add_node(n2)
|
||||
>>> g.add_edge(n1, n2, weight=0.0, type='is-related-to')
|
||||
>>>
|
||||
>>> for n in sorted(g.nodes, key=lambda n: n.weight):
|
||||
>>> print '%.2f' % n.weight, n
|
||||
|
||||
0.00 Node(id='cat')
|
||||
0.00 Node(id='dog')
|
||||
0.07 Node(id='purr')
|
||||
0.07 Node(id='bark')
|
||||
0.15 Node(id='tail')
|
||||
1.00 Node(id='sound')
|
||||
</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> for n in g.shortest_path('purr', 'bark'):
|
||||
>>> print n
|
||||
|
||||
Node(id='purr')
|
||||
Node(id='sound')
|
||||
Node(id='bark')
|
||||
</pre></div>
|
||||
<table border="0">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>
|
||||
<p>When sorted by <span class="inline_code">Node.weight</span> (i.e., eigenvector centrality), <em>sound</em> is the most important node in the network. This can be explained by observing the visualization on the right. Most nodes (indirectly) connect to <em>sound</em> or <em>tail</em>. No nodes connect to <em>dog</em> or <em>cat</em>, so these are the least important in the network (weight <span class="inline_code">0.0</span>).</p>
|
||||
<p>By default, nodes with a higher height will have a larger radius in the visualization.</p>
|
||||
</td>
|
||||
<td><img src="../g/pattern_graph3.jpg" alt="" width="170" height="155" /></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="layout"></a>Graph layout</h2>
|
||||
<p>A <span class="inline_code">GraphLayout</span> updates node positions (<span class="inline_code">Node.x</span>, <span class="inline_code">Node.y</span>) iteratively each time <span class="inline_code">GraphLayout.update()</span> is called. The pattern.graph module currently has one implementation: <span class="inline_code">GraphSpringLayout</span>, which uses a force-based algorithm where edges are regarded as springs. Connected nodes are pulled closer together (attraction) while other nodes are pushed further apart (repulsion).</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">layout = GraphSpringLayout(graph)</pre><pre class="brush:python; gutter:false; light:true;">layout.graph # Graph owner.
|
||||
layout.iterations # Starts at 0, +1 each update().
|
||||
layout.bounds # (x, y, width, height)-tuple.</pre><pre class="brush:python; gutter:false; light:true;">layout.k # Force constant (4.0)
|
||||
layout.force # Force multiplier (0.01)
|
||||
layout.repulsion # Maximum repulsion radius (50)</pre><pre class="brush:python; gutter:false; light:true;">layout.update(weight=10.0, limit=0.5) # weight = Edge.weight multiplier.
|
||||
layout.reset()
|
||||
layout.copy(graph)</pre><p><span class="small"><span style="text-decoration: underline;">Reference</span>: Hellesoy, A. & Hoover, D. (2006). http://ajaxian.com/archives/new-javascriptcanvas-graph-library</span></p>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="utility"></a>Graph adjacency</h2>
|
||||
<p>The pattern.graph has a number of functions that can be used to modify graph edges:</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">unlink(graph, node1, node2=None)</pre><pre class="brush:python; gutter:false; light:true;">redirect(graph, node1, node2)</pre><pre class="brush:python; gutter:false; light:true;">cut(graph, node)</pre><pre class="brush:python; gutter:false; light:true;">insert(graph, node, a, b)</pre><ul>
|
||||
<li style="margin-bottom: 0.3em;"><span class="inline_code">unlink()</span> removes the edge between <span class="inline_code">node1</span> and <span class="inline_code">node2</span>. <br />If only <span class="inline_code">node1</span> is given, removes all edges to + from it. This does not remove <span class="inline_code">node1</span> from the graph.</li>
|
||||
<li style="margin-bottom: 0.3em;"><span class="inline_code">redirect()</span> connects <span class="inline_code">node1</span>'s edges to <span class="inline_code">node2</span> and removes <span class="inline_code">node1</span>.<br />If <span class="inline_code">A</span>, <span class="inline_code">B</span>, <span class="inline_code">C</span>, <span class="inline_code">D</span> are nodes and <span class="inline_code">A</span> → <span class="inline_code">B</span> and <span class="inline_code">C</span> → <span class="inline_code">D</span>, and we redirect <span class="inline_code">A</span> to <span class="inline_code">C</span>, then <span class="inline_code">C</span> → <span class="inline_code">B</span> and <span class="inline_code">C</span> → <span class="inline_code">D</span>.</li>
|
||||
<li style="margin-bottom: 0.3em;"><span class="inline_code">cut()</span> removes the given <span class="inline_code">node</span> and connects the surrounding nodes. <br />If <span class="inline_code">A</span>, <span class="inline_code">B</span>, <span class="inline_code">C</span>, <span class="inline_code">D</span> are nodes and <span class="inline_code">A</span> <span>→</span> <span class="inline_code">B</span> and <span class="inline_code">B</span> <span>→</span> <span class="inline_code">C</span> and <span class="inline_code">B</span> <span>→</span> <span class="inline_code">D</span>, and we cut <span class="inline_code">B</span>, then <span class="inline_code">A</span> <span>→</span> <span class="inline_code">C</span> and <span class="inline_code">A</span> <span>→</span> <span class="inline_code">D</span>.</li>
|
||||
<li><span class="inline_code">insert()</span> inserts the given <span class="inline_code">node</span> between node <span class="inline_code">a</span> and node <span class="inline_code">b</span>. <br />If <span class="inline_code">A</span>, <span class="inline_code">B</span>, <span class="inline_code">C</span> are nodes and <span class="inline_code">A</span> <span>→</span> <span class="inline_code">B</span>, and we insert <span class="inline_code">C</span>, then <span class="inline_code">A</span> <span>→</span> <span class="inline_code">C</span> and <span class="inline_code">C</span> <span>→</span> <span class="inline_code">B</span>.</li>
|
||||
</ul>
|
||||
<h3>Edge adjacency map</h3>
|
||||
<p><span style="font-variant: normal;">The <span class="inline_code">adjacency()</span> function returns a map of linked nodes:</span><span class="smallcaps"><br /></span></p>
|
||||
<pre class="brush:python; gutter:false; light:true;">adjacency(graph,
|
||||
directed = False,
|
||||
reversed = False,
|
||||
stochastic = False,
|
||||
heuristic = lambda node1, node2: 0)</pre><p>The return value is an <span class="inline_code">{id1:</span> <span class="inline_code">{id2:</span> <span class="inline_code">weight}}</span> dictionary with <span class="inline_code">Node.id</span>'s as keys, where each value is a dictionary of connected <span class="inline_code">Node.id</span>'s <span style="line-height: 18px;">→</span> <span class="inline_code">Edge.weight</span>.</p>
|
||||
<p>If <span class="inline_code">directed=True</span>, edges are only traversable in one direction. If <span class="inline_code">stochastic=True</span>, the edge weights for all neighbors of a given node sum to <span class="inline_code">1.0</span>. The optional <span class="inline_code">heuristic</span> function takes two node id's and returns an additional cost (<span class="inline_code">0.0</span>-<span class="inline_code">1.0</span>) for traversing their edges. </p>
|
||||
<h3>Edge traversal</h3>
|
||||
<p>The <span class="inline_code">bfs()</span> function (breadth-first search) visits all nodes connected to the given <span class="inline_code">node</span>. <br />The <span class="inline_code">dfs()</span> function (depth-first search) visits all nodes connected to the given <span class="inline_code">node</span> depth-first, i.e., as far as possible along each path before backtracking.</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">bfs(node, visit=lambda node: False, traversable=lambda node, edge: True)</pre><pre class="brush:python; gutter:false; light:true;">dfs(node, visit=lambda node: False, traversable=lambda node, edge: True)
|
||||
</pre><p>The given <span class="inline_code">visit</span> function is called with each visited node. Traversal will stop if it returns <span class="inline_code">True</span>, and subsequently <span class="inline_code">bfs()</span> or <span class="inline_code">dfs()</span> will return <span class="inline_code">True</span>.</p>
|
||||
<p>The given <span class="inline_code">traversable</span> function takes the visited <span class="inline_code">Node</span> and an <span class="inline_code">Edge</span> and returns <span class="inline_code">True</span> if we are allowed to follow this connection to the next node. For example, the traversable for directed edges:</p>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">>>> def directed(node, edge):
|
||||
>>> return node.id == edge.node1.id
|
||||
>>>
|
||||
>>> dfs(g, traversable=directed) </pre></div>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="canvas"></a>Visualization</h2>
|
||||
<p>The pattern.graph module has a JavaScript counterpart (graph.js) that can be used to visualize a graph in a web page, as a HTML <canvas> element. The HTML <canvas> element allows dynamic, scriptable rendering of 2D shapes and bitmap images (see also Pattern's <a class="link-maintenance" href="pattern-canvas.html">canvas.js</a>).</p>
|
||||
<p><span class="inline_code">Graph.export(</span>) creates a new file folder at the given <span class="inline_code">path</span> with an index.html (the visualization), a style.css, graphs.js and canvas.js. The optional parameter <span class="inline_code">javascript</span> defines the URL path to graph.js and canvas.js (which will not be included in this case).</p>
|
||||
<pre class="brush:python; gutter:false; light:true;">graph.export(path, encoding='utf-8', **kwargs)</pre><div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.graph import Graph
|
||||
>>>
|
||||
>>> g = Graph()
|
||||
>>> for n1, n2 in (
|
||||
>>> ('cat', 'tail'), ('cat', 'purr'), ('purr', 'sound'),
|
||||
>>> ('dog', 'tail'), ('dog', 'bark'), ('bark', 'sound')):
|
||||
>>> g.add_node(n1)
|
||||
>>> g.add_node(n2)
|
||||
>>> g.add_edge(n1, n2, weight=0.0, type='is-related-to')
|
||||
>>>
|
||||
>>> g.export('sound', directed=True)</pre></div>
|
||||
<p>Nodes and edges will be styled according to their <span class="inline_code">fill</span>, <span class="inline_code">stroke</span>, and <span class="inline_code">strokewidth</span> properties.</p>
|
||||
<p>The following parameters can be used to customize the visualization:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><span class="smallcaps">Parameter</span></td>
|
||||
<td><span class="smallcaps">Default</span></td>
|
||||
<td><span class="smallcaps">Description</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">javascript</span></td>
|
||||
<td><span class="inline_code">''</span></td>
|
||||
<td>Path to canvas.js and graph.js.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">stylesheet</span></td>
|
||||
<td class="inline_code"><span class="inline_code">INLINE</span></td>
|
||||
<td>Path to CSS: INLINE, <span class="inline_code">DEFAULT</span> (generates style.css), <span class="inline_code">None</span> or path.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">title</span></td>
|
||||
<td><span class="inline_code">'Graph'</span></td>
|
||||
<td>HTML <span class="inline_code"><span><span class="inline_code"><title>Graph</title></span>.</span></span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">id</span></td>
|
||||
<td><span class="inline_code">'graph'</span></td>
|
||||
<td>HTML <span class="inline_code"><div</span> <span class="inline_code">id="graph"></span> contains the <span class="inline_code"><canvas></span>.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border: 0; font-size: 0.5em;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">ctx</span></td>
|
||||
<td><span class="inline_code">'canvas.element'</span></td>
|
||||
<td>HTML <span class="inline_code"><canvas></span> element to use for drawing.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">width</span></td>
|
||||
<td><span class="inline_code">700</span></td>
|
||||
<td>Canvas width in pixels.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">height</span></td>
|
||||
<td><span class="inline_code">500</span></td>
|
||||
<td>Canvas height in pixels.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">frames</span></td>
|
||||
<td><span class="inline_code">500</span></td>
|
||||
<td>Number of frames of animation.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">ipf</span></td>
|
||||
<td><span class="inline_code">2</span></td>
|
||||
<td><span class="inline_code">GraphLayout.update()</span> iterations per frame.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border: 0; font-size: 0.5em;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">directed</span></td>
|
||||
<td><span class="inline_code">False</span></td>
|
||||
<td>Visualize eigenvector centrality as an edge arrow?</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">weighted</span></td>
|
||||
<td><span class="inline_code">False</span></td>
|
||||
<td>Visualize betweenness centrality as a node shadow?</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">pack</span></td>
|
||||
<td><span class="inline_code">True</span></td>
|
||||
<td>Shorten leaf edges + add node weight to node radius.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border: 0; font-size: 0.5em;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">distance</span></td>
|
||||
<td><span class="inline_code">graph.distance</span></td>
|
||||
<td>Average edge length.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">k</span></td>
|
||||
<td><span class="inline_code">graph.k</span></td>
|
||||
<td>Force constant.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">force</span></td>
|
||||
<td><span class="inline_code">graph.force</span></td>
|
||||
<td>Force dampener.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">repulsion</span></td>
|
||||
<td><span class="inline_code">graph.repulsion</span></td>
|
||||
<td>Force radius.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border: 0; font-size: 0.5em;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">href</span></td>
|
||||
<td><span class="inline_code">{}</span></td>
|
||||
<td>Dictionary of <span class="inline_code">Node.id</span> => URL.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><span class="inline_code">css</span></td>
|
||||
<td><span class="inline_code">{}</span></td>
|
||||
<td>Dictionary of <span class="inline_code">Node.id</span> => CSS classname.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>To export a static visualization, use <span class="inline_code">frames=1</span> and <span class="inline_code">ipf=0</span>.<br /> </p>
|
||||
<p class="smallcaps">Server-side scripting</p>
|
||||
<p><span class="inline_code">Graph.serialize()</span> returns a string with (a portion of) the HTML, CSS and JavaScript source code of the visualization. It can be used to serve a dynamic web page. With <span class="inline_code">type=CANVAS</span>, it returns a HTML string with a <span class="inline_code"><div</span> <span class="inline_code">id="graph"></span> that contains the canvas.js animation. With <span class="inline_code">type=DATA</span>, it returns a Javascript string that initializes the <span class="inline_code">Graph</span> in variable <span class="inline_code">g</span> (which will draw to <span class="inline_code">ctx</span>).</p>
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">graph.serialize(type=HTML, **kwargs) # HTML | CSS | CANVAS | DATA</pre><div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">>>> import cherrypy
|
||||
>>>
|
||||
>>> class Visualization(object):
|
||||
>>> def index(self):
|
||||
>>> return (
|
||||
>>> '<html>'
|
||||
>>> '<head>'
|
||||
>>> '<script src="canvas.js"></script>'
|
||||
>>> '<script src="graph.js"></script>'
|
||||
>>> '</head>'
|
||||
>>> '<body>' + g.serialize(CANVAS, directed=True) +
|
||||
>>> '</body>'
|
||||
>>> '</html>'
|
||||
>>> )
|
||||
>>> index.exposed = True
|
||||
>>>
|
||||
>>> cherrypy.quickstart(Visualization())</pre></div>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2><a name="javascript"></a>graph.js</h2>
|
||||
<p>Below is a standalone demonstration of graph.js, without using <span class="inline_code">export()</span> or canvas.js. The <span class="inline_code">Graph.loop()</span> method fires the spring layout algorithm (<span class="link-maintenance"><a href="http://www.clips.ua.ac.be/media/pattern-graph/random" target="_blank">view live demo</a></span>).</p>
|
||||
<p><img class="border" src="../g/pattern_graph4.jpg" alt="" width="610" height="390" /></p>
|
||||
<div class="example">
|
||||
<pre class="brush:xml; gutter:false; light:true;"><!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<style>
|
||||
#graph { display: block; position: relative; overflow: hidden; }
|
||||
#graph .node-label { font: 11px sans-serif; }
|
||||
</style>
|
||||
<script src="graph.js"></script>
|
||||
<script>
|
||||
</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush: jscript;gutter: false; light: true; fontsize: 100; first-line: 1; "> function spring() {
|
||||
SHADOW = 0.65 // slow...
|
||||
g = new Graph(document.getElementById("_ctx"));
|
||||
// Random nodes.
|
||||
for (var i=0; i < 50; i++) {
|
||||
g.addNode(i+1);
|
||||
}
|
||||
// Random edges.
|
||||
for (var j=0; j < 75; j++) {
|
||||
var n1 = choice(g.nodes);
|
||||
var n2 = choice(g.nodes);
|
||||
g.addEdge(n1, n2, {weight: Math.random()});
|
||||
}
|
||||
g.prune(0);
|
||||
g.betweennessCentrality();
|
||||
g.eigenvectorCentrality();
|
||||
g.loop({frames:500, fps:30, ipf:2, weighted:0.5, directed:true});
|
||||
}
|
||||
</pre></div>
|
||||
<div class="example">
|
||||
<pre class="brush:xml; gutter:false; light:true;"> </script>
|
||||
</head>
|
||||
<body onload="spring();">
|
||||
<div id="graph" style="width:700px; height:500px;">
|
||||
<canvas id="_ctx" width="700" height="500"></canvas>
|
||||
</div>
|
||||
</body>
|
||||
</html> </pre></div>
|
||||
<p> </p>
|
||||
<hr />
|
||||
<h2>See also</h2>
|
||||
<ul>
|
||||
<li><a href="http://gephi.org/" target="_blank">Gephi</a> (GPL): ne<span>twork analysis & visualization GUI.</span></li>
|
||||
<li><a href="http://networkx.lanl.gov/" target="_blank">NetworkX</a> (BSD): <span>network analysis toolkit for Python + NumPy.</span></li>
|
||||
<li><a href="http://www.cityinabottle.org/nodebox/" target="_blank">NodeBox</a> (BSD): g<span>raphics toolkit for Python + OpenGL.</span></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,613 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>pattern-it</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<link type="text/css" rel="stylesheet" href="../clips.css" />
|
||||
<style>
|
||||
/* Small fixes because we omit the online layout.css. */
|
||||
h3 { line-height: 1.3em; }
|
||||
#page { margin-left: auto; margin-right: auto; }
|
||||
#header, #header-inner { height: 175px; }
|
||||
#header { border-bottom: 1px solid #C6D4DD; }
|
||||
table { border-collapse: collapse; }
|
||||
#checksum { display: none; }
|
||||
</style>
|
||||
<link href="../js/shCore.css" rel="stylesheet" type="text/css" />
|
||||
<link href="../js/shThemeDefault.css" rel="stylesheet" type="text/css" />
|
||||
<script language="javascript" src="../js/shCore.js"></script>
|
||||
<script language="javascript" src="../js/shBrushXml.js"></script>
|
||||
<script language="javascript" src="../js/shBrushJScript.js"></script>
|
||||
<script language="javascript" src="../js/shBrushPython.js"></script>
|
||||
</head>
|
||||
<body class="node-type-page one-sidebar sidebar-right section-pages">
|
||||
<div id="page">
|
||||
<div id="page-inner">
|
||||
<div id="header"><div id="header-inner"></div></div>
|
||||
<div id="content">
|
||||
<div id="content-inner">
|
||||
<div class="node node-type-page"
|
||||
<div class="node-inner">
|
||||
<div class="breadcrumb">View online at: <a href="http://www.clips.ua.ac.be/pages/pattern-it" class="noexternal" target="_blank">http://www.clips.ua.ac.be/pages/pattern-it</a></div>
|
||||
<h1>pattern.it</h1>
|
||||
<!-- Parsed from the online documentation. -->
|
||||
<div id="node-1698" class="node node-type-page"><div class="node-inner">
|
||||
<div class="content">
|
||||
<p><span class="big">The pattern.it module contains a fast part-of-speech tagger for Italian (identifies nouns, adjectives, verbs, etc. in a sentence) and tools for Italian verb conjugation and noun singularization & pluralization.</span></p>
|
||||
<p>It can be used by itself or with other <a href="pattern.html">pattern</a> modules: <a href="pattern-web.html">web</a> | <a href="pattern-db.html">db</a> | <a href="pattern-en.html">en</a> | <a href="pattern-search.html">search</a> | <a href="pattern-vector.html">vector</a> | <a href="pattern-graph.html">graph</a>.</p>
|
||||
<p><img src="../g/pattern_schema_it.gif" alt="" /></p>
|
||||
<hr />
|
||||
<h2>Documentation</h2>
|
||||
<p>The functions in this module take the same parameters and return the same values as their counterparts in <a href="pattern-en.html">pattern.en</a>. Refer to the documentation there for more details. </p>
|
||||
<h3>Gender</h3>
|
||||
<p>Italian nouns and adjectives inflect according to gender. The <span class="inline_code">gender()</span> function predicts the gender (<span class="inline_code">MALE</span>, <span class="inline_code">FEMALE</span>, <span class="inline_code">PLURAL</span>) of a given noun with about 92% accuracy: </p>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">>>> from pattern.it import gender, MALE, FEMALE, PLURAL
|
||||
>>> print gender('gatti')
|
||||
|
||||
(MALE, PLURAL)</pre></div>
|
||||
<h3>Article</h3>
|
||||
<p>The <span class="inline_code">article()</span> function returns the article (<span class="inline_code">INDEFINITE</span> or <span class="inline_code">DEFINITE</span>) inflected by gender (e.g., <em><span style="text-decoration: underline;">il</span> gatto</em> → <em><span style="text-decoration: underline;">i</span> gatti</em>).</p>
|
||||
<div class="example">
|
||||
<pre class="brush: python;gutter: false; light: true; fontsize: 100; first-line: 1; ">>>> from pattern.it import article, DEFINITE, MALE, PLURAL
|
||||
>>> print article('gatti', DEFINITE, gender=(MALE, PLURAL))
|
||||
|
||||
i</pre></div>
|
||||
<h3>Noun singularization & pluralization</h3>
|
||||
<p>For Italian nouns there is <span class="inline_code">singularize()</span> and <span class="inline_code">pluralize()</span>. The implementation is slightly less robust than the English version (accuracy 84% for singularization and 93% for pluralization).</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.it import singularize, pluralize
|
||||
>>>
|
||||
>>> print singularize('gatti')
|
||||
>>> print pluralize('gatto')
|
||||
|
||||
gatto
|
||||
gatti </pre></div>
|
||||
<h3>Verb conjugation</h3>
|
||||
<p>For Italian verbs there is <span class="inline_code">conjugate()</span>, <span class="inline_code">lemma()</span>, <span class="inline_code">lexeme()</span> and <span class="inline_code">tenses()</span>. The lexicon for verb conjugation contains about 1,250 common Italian verbs, mined from Wiktionary. For unknown verbs it will fall back to a rule-based approach with an accuracy of about 86%. </p>
|
||||
<p>Italian verbs have more tenses than English verbs. In particular, the plural differs for each person, and there are additional forms for the <span class="inline_code">FUTURE</span> tense, the <span class="inline_code">IMPERATIVE</span>, <span class="inline_code">CONDITIONAL</span> and <span class="inline_code">SUBJUNCTIVE</span> mood and the <span class="inline_code">PERFECTIVE</span> aspect:</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.it import conjugate
|
||||
>>> from pattern.it import INFINITIVE, PRESENT, PAST, SG, SUBJUNCTIVE, PERFECTIVE
|
||||
>>>
|
||||
>>> print conjugate('sono', INFINITIVE)
|
||||
>>> print conjugate('sono', PRESENT, 1, SG, mood=SUBJUNCTIVE)
|
||||
>>> print conjugate('sono', PAST, 3, SG)
|
||||
>>> print conjugate('sono', PAST, 3, SG, aspect=PERFECTIVE)
|
||||
|
||||
essere
|
||||
sia
|
||||
era
|
||||
fu </pre></div>
|
||||
<p>For <span class="inline_code">PAST</span> tense + <span class="inline_code">PERFECTIVE</span> aspect we can also use <span class="inline_code">PRETERITE</span> (<em>passato remoto</em>) For <span class="inline_code">PAST</span> tense + <span class="inline_code">IMPERFECTIVE</span> aspect we can also use <span class="inline_code">IMPERFECT</span> (<em>imperfetto</em>).</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.it import conjugate
|
||||
>>> from pattern.it import IMPERFECT, PRETERITE
|
||||
>>>
|
||||
>>> print conjugate('sono', IMPERFECT, 3, SG)
|
||||
>>> print conjugate('sono', PRETERITE, 3, SG)
|
||||
|
||||
era
|
||||
fu </pre></div>
|
||||
<p> The <span class="inline_code">conjugate()</span> function takes the following optional parameters:</p>
|
||||
<table class="border">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="smallcaps">Tense</td>
|
||||
<td class="smallcaps">Person</td>
|
||||
<td class="smallcaps">Number</td>
|
||||
<td class="smallcaps">Mood</td>
|
||||
<td class="smallcaps">Aspect</td>
|
||||
<td class="smallcaps">Alias</td>
|
||||
<td class="smallcaps">Example</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">INFINITVE</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">"inf"</td>
|
||||
<td><em>essere</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">sono</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">sei</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">è</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">siamo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">siete</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">sono</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PROGRESSIVE</td>
|
||||
<td class="inline_code">"part"</td>
|
||||
<td><em>essendo</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg!"</td>
|
||||
<td><em>sii</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg!"</td>
|
||||
<td><em>sia</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl!"</td>
|
||||
<td><em>siamo</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl!"</td>
|
||||
<td><em>siate</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">IMPERATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl!"</td>
|
||||
<td><em>siano</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg?"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">sia</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg?"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">sia</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg?"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">sia</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl?"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">siamo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl?"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">siate</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PRESENT</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl?"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">siano</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">ero</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">eri</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">era</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">e</span><span style="text-decoration: underline;">ravamo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">eravate</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">erano</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">None</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PROGRESSIVE</td>
|
||||
<td class="inline_code">"ppart"</td>
|
||||
<td><em>stato</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp+"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">fui</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp+"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">fosti</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp+"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">fu</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl+"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">fummo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl+"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">foste</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">PERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl+"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">furono</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgp?"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">fossi</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgp?"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">fossi</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgp?"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">fosse</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1ppl?"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">fossimo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2ppl?"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">foste</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">PAST</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">SUBJUNCTIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3ppl?"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">fossero</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sgf"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">sarò</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sgf"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">sarai</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sgf"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">sarà</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1plf"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">saremo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2plf"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">sarete</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">FUTURE</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3plf"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">saranno</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="border-left: 0; border-right: 0; padding: 0;"> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1sg->"</td>
|
||||
<td><em>io <span style="text-decoration: underline;">sarei</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2sg->"</td>
|
||||
<td><em>tu <span style="text-decoration: underline;">saresti</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">SG</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3sg->"</td>
|
||||
<td><em>lui <span style="text-decoration: underline;">sarebbe</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">1</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"1pl->"</td>
|
||||
<td><em>noi <span style="text-decoration: underline;">saremmo</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">2</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"2pl->"</td>
|
||||
<td><em>voi <span style="text-decoration: underline;">sareste</span></em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="inline_code">CONDITIONAL</td>
|
||||
<td class="inline_code">3</td>
|
||||
<td class="inline_code">PL</td>
|
||||
<td class="inline_code">INDICATIVE</td>
|
||||
<td class="inline_code">IMPERFECTIVE</td>
|
||||
<td class="inline_code">"3pl->"</td>
|
||||
<td><em>loro <span style="text-decoration: underline;">sarebbero</span></em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<p>Instead of optional parameters, a single short alias, or <span class="inline_code">PARTICIPLE</span> or <span class="inline_code">PAST+PARTICIPLE</span> can also be given. With no parameters, the infinitive form of the verb is returned.</p>
|
||||
<h3>Attributive & predicative adjectives </h3>
|
||||
<p>Italian adjectives inflect with suffixes <span class="inline_code">-o</span> → <span class="inline_code">-i</span> (masculine) and <span class="inline_code">-a</span> → <span class="inline_code">-e</span> (feminine), with some exceptions (e.g., <em>grande</em> → <em>i grandi felini</em>). You can get the base form with the <span class="inline_code">predicative()</span> function. A statistical approach is used with an accuracy of 88%.</p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.it import attributive
|
||||
>>> print predicative('grandi')
|
||||
|
||||
grande </pre></div>
|
||||
<h3>Parser</h3>
|
||||
<p>For parsing there is <span class="inline_code" style="font-family: Courier, monospace; font-size: 12px;">parse()</span>, <span class="inline_code">parsetree()</span> and <span class="inline_code" style="font-family: Courier, monospace; font-size: 12px;">split()</span>. The <span class="inline_code">parse()</span> function annotates words in the given string with their part-of-speech <a class="link-maintenance" href="mbsp-tags.html">tags</a> (e.g., <span class="postag">NN</span> for nouns and <span class="postag">VB</span> for verbs). The <span class="inline_code">parsetree()</span> function takes a string and returns a tree of nested objects (<span class="inline_code">Text</span> → <span class="inline_code">Sentence</span> → <span class="inline_code">Chunk</span> → <span class="inline_code">Word</span>). The <span class="inline_code">split()</span> function takes the output of <span class="inline_code">parse()</span> and returns a <span class="inline_code">Text</span>. See the <span class="inline_code">pattern.en</span> documentation (<span class="link-maintenance" style="color: #78aaff;"><a style="color: #8caaff; outline-style: none !important; outline-width: initial !important; outline-color: initial !important;" href="pattern-en.html#tree">here</a></span>) how to manipulate <span class="inline_code">Text</span> objects. </p>
|
||||
<div class="example">
|
||||
<pre class="brush:python; gutter:false; light:true;">>>> from pattern.it import parse, split
|
||||
>>>
|
||||
>>> s = parse('Il gatto nero faceva le fusa.')
|
||||
>>> for sentence in split(s):
|
||||
>>> print sentence
|
||||
|
||||
Sentence('Il/DT/B-NP/O gatto/NN/I-NP/O nero/JJ/I-NP/O'
|
||||
'faceva/VB/B-VP/O'
|
||||
'le/DT/B-NP/O fusa/NN/I-NP/O ././O/O')
|
||||
</pre></div>
|
||||
<p>The parser is mined from Wiktionary. The accuracy is around 92%.</p>
|
||||
<h3>Sentiment analysis</h3>
|
||||
<p>There's no <span class="inline_code">sentiment()</span> function for Italian yet.</p>
|
||||
</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
SyntaxHighlighter.all();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|