90 lines
2.6 KiB
Python
90 lines
2.6 KiB
Python
#!/usr/bin/env python3
|
||
"""Tests for PartOfSpeechTagger class"""
|
||
import copy
|
||
import unittest
|
||
|
||
from gruut.pos import PartOfSpeechTagger
|
||
|
||
|
||
class PartOfSpeechTaggerTestCase(unittest.TestCase):
|
||
"""Test cases for PartOfSpeechTagger class"""
|
||
|
||
def test_encode_decode(self):
|
||
"""Test encode/decode functions for pycrfsuite features"""
|
||
s = "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn"
|
||
self.assertEqual(
|
||
PartOfSpeechTagger.decode_string(PartOfSpeechTagger.encode_string(s)), s
|
||
)
|
||
|
||
def test_features(self):
|
||
"""Test sentence features"""
|
||
sentence = "1 test .".split()
|
||
word_features = {
|
||
"1": {
|
||
"bias": 1.0,
|
||
"word": "1",
|
||
"len(word)": 1,
|
||
"word.ispunctuation": False,
|
||
"word.isdigit()": True,
|
||
"word[:2]": "1",
|
||
"word[-2:]": "1",
|
||
},
|
||
"test": {
|
||
"bias": 1.0,
|
||
"word": "test",
|
||
"len(word)": 4,
|
||
"word.ispunctuation": False,
|
||
"word[-2:]": "st",
|
||
"word[:2]": "te",
|
||
"word.isdigit()": False,
|
||
},
|
||
".": {
|
||
"bias": 1.0,
|
||
"word": ".",
|
||
"len(word)": 1,
|
||
"word.ispunctuation": True,
|
||
"word.isdigit()": False,
|
||
"word[-2:]": ".",
|
||
"word[:2]": ".",
|
||
},
|
||
}
|
||
|
||
def add_prefix(d, prefix):
|
||
return {f"{prefix}{k}": v for k, v in d.items()}
|
||
|
||
# Add context
|
||
context_features = copy.deepcopy(word_features)
|
||
context_features["1"].update(add_prefix(word_features["test"], "+1:"))
|
||
|
||
context_features["test"].update(add_prefix(word_features["1"], "-1:"))
|
||
context_features["test"].update(add_prefix(word_features["."], "+1:"))
|
||
|
||
context_features["."].update(add_prefix(word_features["test"], "-1:"))
|
||
|
||
# Add BOS/EOS
|
||
context_features["1"]["BOS"] = True
|
||
context_features["."]["EOS"] = True
|
||
|
||
expected_features = [
|
||
context_features["1"],
|
||
context_features["test"],
|
||
context_features["."],
|
||
]
|
||
|
||
actual_features = PartOfSpeechTagger.sent2features(
|
||
sentence,
|
||
words_forward=1,
|
||
words_backward=1,
|
||
chars_front=2,
|
||
chars_back=2,
|
||
encode=False,
|
||
)
|
||
|
||
self.assertEqual(expected_features, actual_features)
|
||
|
||
|
||
# -----------------------------------------------------------------------------
|
||
|
||
if __name__ == "__main__":
|
||
unittest.main()
|