ai-content-maker/.venv/Lib/site-packages/tests/test_pos.py

90 lines
2.6 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
#!/usr/bin/env python3
"""Tests for PartOfSpeechTagger class"""
import copy
import unittest
from gruut.pos import PartOfSpeechTagger
class PartOfSpeechTaggerTestCase(unittest.TestCase):
"""Test cases for PartOfSpeechTagger class"""
def test_encode_decode(self):
"""Test encode/decode functions for pycrfsuite features"""
s = "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn"
self.assertEqual(
PartOfSpeechTagger.decode_string(PartOfSpeechTagger.encode_string(s)), s
)
def test_features(self):
"""Test sentence features"""
sentence = "1 test .".split()
word_features = {
"1": {
"bias": 1.0,
"word": "1",
"len(word)": 1,
"word.ispunctuation": False,
"word.isdigit()": True,
"word[:2]": "1",
"word[-2:]": "1",
},
"test": {
"bias": 1.0,
"word": "test",
"len(word)": 4,
"word.ispunctuation": False,
"word[-2:]": "st",
"word[:2]": "te",
"word.isdigit()": False,
},
".": {
"bias": 1.0,
"word": ".",
"len(word)": 1,
"word.ispunctuation": True,
"word.isdigit()": False,
"word[-2:]": ".",
"word[:2]": ".",
},
}
def add_prefix(d, prefix):
return {f"{prefix}{k}": v for k, v in d.items()}
# Add context
context_features = copy.deepcopy(word_features)
context_features["1"].update(add_prefix(word_features["test"], "+1:"))
context_features["test"].update(add_prefix(word_features["1"], "-1:"))
context_features["test"].update(add_prefix(word_features["."], "+1:"))
context_features["."].update(add_prefix(word_features["test"], "-1:"))
# Add BOS/EOS
context_features["1"]["BOS"] = True
context_features["."]["EOS"] = True
expected_features = [
context_features["1"],
context_features["test"],
context_features["."],
]
actual_features = PartOfSpeechTagger.sent2features(
sentence,
words_forward=1,
words_backward=1,
chars_front=2,
chars_back=2,
encode=False,
)
self.assertEqual(expected_features, actual_features)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
unittest.main()