You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
148 lines
4.1 KiB
Python
148 lines
4.1 KiB
Python
#### PATTERN | XX | INFLECT ########################################################################
|
|
# -*- coding: utf-8 -*-
|
|
# Copyright (c)
|
|
# Author:
|
|
# License:
|
|
# http://www.clips.ua.ac.be/pages/pattern
|
|
|
|
####################################################################################################
|
|
# Template for pattern.xx.inflect with functions for word inflection in language XXXXX.
|
|
# inflection is the modification of a word to express different grammatical categories,
|
|
# such as tense, mood, voice, aspect, person, number, gender and case.
|
|
# Conjugation is the inflection of verbs.
|
|
# To construct a lemmatizer for pattern.xx.parser.find_lemmata(),
|
|
# we need functions for noun singularization, verb infinitives, predicate adjectives, etc.
|
|
|
|
from __future__ import unicode_literals
|
|
from __future__ import division
|
|
|
|
from builtins import str, bytes, dict, int
|
|
from builtins import map, zip, filter
|
|
from builtins import object, range
|
|
|
|
import os
|
|
import sys
|
|
import re
|
|
|
|
try:
|
|
MODULE = os.path.dirname(os.path.realpath(__file__))
|
|
except:
|
|
MODULE = ""
|
|
|
|
sys.path.insert(0, os.path.join(MODULE, "..", "..", "..", ".."))
|
|
|
|
# Import Verbs base class and verb tenses.
|
|
from pattern.text import Verbs as _Verbs
|
|
from pattern.text import (
|
|
INFINITIVE, PRESENT, PAST, FUTURE,
|
|
FIRST, SECOND, THIRD,
|
|
SINGULAR, PLURAL, SG, PL,
|
|
PROGRESSIVE,
|
|
PARTICIPLE
|
|
)
|
|
|
|
sys.path.pop(0)
|
|
|
|
VERB, NOUN, ADJECTIVE, ADVERB = "VB", "NN", "JJ", "RB"
|
|
|
|
VOWELS = "aeiouy"
|
|
re_vowel = re.compile(r"a|e|i|o|u|y", re.I)
|
|
is_vowel = lambda ch: ch in VOWELS
|
|
|
|
#### ARTICLE #######################################################################################
|
|
|
|
# Inflection gender.
|
|
MASCULINE, FEMININE, NEUTER, PLURAL = \
|
|
MALE, FEMALE, NEUTRAL, PLURAL = \
|
|
M, F, N, PL = "m", "f", "n", "p"
|
|
|
|
|
|
def definite_article(word):
|
|
""" Returns the definite article for a given word.
|
|
"""
|
|
return "the"
|
|
|
|
|
|
def indefinite_article(word):
|
|
""" Returns the indefinite article for a given word.
|
|
"""
|
|
return "a"
|
|
|
|
DEFINITE, INDEFINITE = \
|
|
"definite", "indefinite"
|
|
|
|
|
|
def article(word, function=INDEFINITE):
|
|
""" Returns the indefinite or definite article for the given word.
|
|
"""
|
|
return function == DEFINITE \
|
|
and definite_article(word) \
|
|
or indefinite_article(word)
|
|
|
|
_article = article
|
|
|
|
|
|
def referenced(word, article=INDEFINITE):
|
|
""" Returns a string with the article + the word.
|
|
"""
|
|
return "%s %s" % (_article(word, article), word)
|
|
|
|
#### PLURALIZE ######################################################################################
|
|
|
|
|
|
def pluralize(word, pos=NOUN, custom={}):
|
|
""" Returns the plural of a given word.
|
|
"""
|
|
return word + "s"
|
|
|
|
#### SINGULARIZE ###################################################################################
|
|
|
|
|
|
def singularize(word, pos=NOUN, custom={}):
|
|
""" Returns the singular of a given word.
|
|
"""
|
|
return word.rstrip("s")
|
|
|
|
#### VERB CONJUGATION ##############################################################################
|
|
# The verb table was trained on CELEX and contains the top 2000 most frequent verbs.
|
|
|
|
|
|
class Verbs(_Verbs):
|
|
|
|
def __init__(self):
|
|
_Verbs.__init__(self, os.path.join(MODULE, "xx-verbs.txt"),
|
|
language = "xx",
|
|
# The order of tenses in the given file; see pattern.text.__init__.py => Verbs.
|
|
format = [0, 1, 2, 3, 7, 8, 17, 18, 19, 23, 25, 24, 16, 9, 10, 11, 15, 33, 26, 27, 28, 32],
|
|
default = {}
|
|
)
|
|
|
|
def find_lemma(self, verb):
|
|
""" Returns the base form of the given inflected verb, using a rule-based approach.
|
|
"""
|
|
return verb
|
|
|
|
def find_lexeme(self, verb):
|
|
""" For a regular verb (base form), returns the forms using a rule-based approach.
|
|
"""
|
|
return []
|
|
|
|
verbs = Verbs()
|
|
|
|
conjugate, lemma, lexeme, tenses = \
|
|
verbs.conjugate, verbs.lemma, verbs.lexeme, verbs.tenses
|
|
|
|
#### ATTRIBUTIVE & PREDICATIVE #####################################################################
|
|
|
|
|
|
def attributive(adjective):
|
|
""" For a predicative adjective, returns the attributive form.
|
|
"""
|
|
return adjective
|
|
|
|
|
|
def predicative(adjective):
|
|
""" Returns the predicative adjective.
|
|
"""
|
|
return adjective
|