You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

38 lines
1.0 KiB
Python

# -*- coding: utf-8 -*-
"""
Tests for Brill tagger.
"""
import unittest
from nltk.tag import UnigramTagger, brill, brill_trainer
from nltk.tbl import Template
from nltk.corpus import treebank
from nltk.tbl import demo
class TestBrill(unittest.TestCase):
def test_pos_template(self):
train_sents = treebank.tagged_sents()[:1000]
tagger = UnigramTagger(train_sents)
trainer = brill_trainer.BrillTaggerTrainer(
tagger, [brill.Template(brill.Pos([-1]))]
)
brill_tagger = trainer.train(train_sents)
# Example from https://github.com/nltk/nltk/issues/769
result = brill_tagger.tag('This is a foo bar sentence'.split())
expected = [
('This', 'DT'),
('is', 'VBZ'),
('a', 'DT'),
('foo', None),
('bar', 'NN'),
('sentence', None),
]
self.assertEqual(result, expected)
@unittest.skip("Should be tested in __main__ of nltk.tbl.demo")
def test_brill_demo(self):
demo()