You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

116 lines
3.6 KiB
Python

# -*- coding: utf-8 -*-
"""
Unit tests for Senna
"""
from os import environ, path, sep
import logging
import unittest
from nltk.classify import Senna
from nltk.tag import SennaTagger, SennaChunkTagger, SennaNERTagger
# Set Senna executable path for tests if it is not specified as an environment variable
if 'SENNA' in environ:
SENNA_EXECUTABLE_PATH = path.normpath(environ['SENNA']) + sep
else:
SENNA_EXECUTABLE_PATH = '/usr/share/senna-v3.0'
senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH)
@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
class TestSennaPipeline(unittest.TestCase):
"""Unittest for nltk.classify.senna"""
def test_senna_pipeline(self):
"""Senna pipeline interface"""
pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner'])
sent = 'Dusseldorf is an international business center'.split()
result = [
(token['word'], token['chk'], token['ner'], token['pos'])
for token in pipeline.tag(sent)
]
expected = [
('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'),
('is', 'B-VP', 'O', 'VBZ'),
('an', 'B-NP', 'O', 'DT'),
('international', 'I-NP', 'O', 'JJ'),
('business', 'I-NP', 'O', 'NN'),
('center', 'I-NP', 'O', 'NN'),
]
self.assertEqual(result, expected)
@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
class TestSennaTagger(unittest.TestCase):
"""Unittest for nltk.tag.senna"""
def test_senna_tagger(self):
tagger = SennaTagger(SENNA_EXECUTABLE_PATH)
result = tagger.tag('What is the airspeed of an unladen swallow ?'.split())
expected = [
('What', 'WP'),
('is', 'VBZ'),
('the', 'DT'),
('airspeed', 'NN'),
('of', 'IN'),
('an', 'DT'),
('unladen', 'NN'),
('swallow', 'NN'),
('?', '.'),
]
self.assertEqual(result, expected)
def test_senna_chunk_tagger(self):
chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH)
result_1 = chktagger.tag('What is the airspeed of an unladen swallow ?'.split())
expected_1 = [
('What', 'B-NP'),
('is', 'B-VP'),
('the', 'B-NP'),
('airspeed', 'I-NP'),
('of', 'B-PP'),
('an', 'B-NP'),
('unladen', 'I-NP'),
('swallow', 'I-NP'),
('?', 'O'),
]
result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type='NP'))
expected_2 = [
('What', '0'),
('the airspeed', '2-3'),
('an unladen swallow', '5-6-7'),
]
self.assertEqual(result_1, expected_1)
self.assertEqual(result_2, expected_2)
def test_senna_ner_tagger(self):
nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH)
result_1 = nertagger.tag('Shakespeare theatre was in London .'.split())
expected_1 = [
('Shakespeare', 'B-PER'),
('theatre', 'O'),
('was', 'O'),
('in', 'O'),
('London', 'B-LOC'),
('.', 'O'),
]
result_2 = nertagger.tag('UN headquarters are in NY , USA .'.split())
expected_2 = [
('UN', 'B-ORG'),
('headquarters', 'O'),
('are', 'O'),
('in', 'O'),
('NY', 'B-LOC'),
(',', 'O'),
('USA', 'B-LOC'),
('.', 'O'),
]
self.assertEqual(result_1, expected_1)
self.assertEqual(result_2, expected_2)