""" Unit tests for Senna """ import unittest from os import environ, path, sep from nltk.classify import Senna from nltk.tag import SennaChunkTagger, SennaNERTagger, SennaTagger # Set Senna executable path for tests if it is not specified as an environment variable if "SENNA" in environ: SENNA_EXECUTABLE_PATH = path.normpath(environ["SENNA"]) + sep else: SENNA_EXECUTABLE_PATH = "/usr/share/senna-v3.0" senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH) @unittest.skipUnless(senna_is_installed, "Requires Senna executable") class TestSennaPipeline(unittest.TestCase): """Unittest for nltk.classify.senna""" def test_senna_pipeline(self): """Senna pipeline interface""" pipeline = Senna(SENNA_EXECUTABLE_PATH, ["pos", "chk", "ner"]) sent = "Dusseldorf is an international business center".split() result = [ (token["word"], token["chk"], token["ner"], token["pos"]) for token in pipeline.tag(sent) ] expected = [ ("Dusseldorf", "B-NP", "B-LOC", "NNP"), ("is", "B-VP", "O", "VBZ"), ("an", "B-NP", "O", "DT"), ("international", "I-NP", "O", "JJ"), ("business", "I-NP", "O", "NN"), ("center", "I-NP", "O", "NN"), ] self.assertEqual(result, expected) @unittest.skipUnless(senna_is_installed, "Requires Senna executable") class TestSennaTagger(unittest.TestCase): """Unittest for nltk.tag.senna""" def test_senna_tagger(self): tagger = SennaTagger(SENNA_EXECUTABLE_PATH) result = tagger.tag("What is the airspeed of an unladen swallow ?".split()) expected = [ ("What", "WP"), ("is", "VBZ"), ("the", "DT"), ("airspeed", "NN"), ("of", "IN"), ("an", "DT"), ("unladen", "NN"), ("swallow", "NN"), ("?", "."), ] self.assertEqual(result, expected) def test_senna_chunk_tagger(self): chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH) result_1 = chktagger.tag("What is the airspeed of an unladen swallow ?".split()) expected_1 = [ ("What", "B-NP"), ("is", "B-VP"), ("the", "B-NP"), ("airspeed", "I-NP"), ("of", "B-PP"), ("an", "B-NP"), ("unladen", "I-NP"), ("swallow", "I-NP"), ("?", "O"), ] result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type="NP")) expected_2 = [ ("What", "0"), ("the airspeed", "2-3"), ("an unladen swallow", "5-6-7"), ] self.assertEqual(result_1, expected_1) self.assertEqual(result_2, expected_2) def test_senna_ner_tagger(self): nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH) result_1 = nertagger.tag("Shakespeare theatre was in London .".split()) expected_1 = [ ("Shakespeare", "B-PER"), ("theatre", "O"), ("was", "O"), ("in", "O"), ("London", "B-LOC"), (".", "O"), ] result_2 = nertagger.tag("UN headquarters are in NY , USA .".split()) expected_2 = [ ("UN", "B-ORG"), ("headquarters", "O"), ("are", "O"), ("in", "O"), ("NY", "B-LOC"), (",", "O"), ("USA", "B-LOC"), (".", "O"), ] self.assertEqual(result_1, expected_1) self.assertEqual(result_2, expected_2)