ai-content-maker/.venv/Lib/site-packages/tests/test_pos.py

#!/usr/bin/env python3
"""Tests for PartOfSpeechTagger class"""
import copy
import unittest

from gruut.pos import PartOfSpeechTagger


class PartOfSpeechTaggerTestCase(unittest.TestCase):
    """Test cases for PartOfSpeechTagger class"""

    def test_encode_decode(self):
        """Test encode/decode functions for pycrfsuite features"""
        s = "ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn"
        self.assertEqual(
            PartOfSpeechTagger.decode_string(PartOfSpeechTagger.encode_string(s)), s
        )

    def test_features(self):
        """Test sentence features"""
        sentence = "1 test .".split()
        word_features = {
            "1": {
                "bias": 1.0,
                "word": "1",
                "len(word)": 1,
                "word.ispunctuation": False,
                "word.isdigit()": True,
                "word[:2]": "1",
                "word[-2:]": "1",
            },
            "test": {
                "bias": 1.0,
                "word": "test",
                "len(word)": 4,
                "word.ispunctuation": False,
                "word[-2:]": "st",
                "word[:2]": "te",
                "word.isdigit()": False,
            },
            ".": {
                "bias": 1.0,
                "word": ".",
                "len(word)": 1,
                "word.ispunctuation": True,
                "word.isdigit()": False,
                "word[-2:]": ".",
                "word[:2]": ".",
            },
        }

        def add_prefix(d, prefix):
            return {f"{prefix}{k}": v for k, v in d.items()}

        # Add context
        context_features = copy.deepcopy(word_features)
        context_features["1"].update(add_prefix(word_features["test"], "+1:"))

        context_features["test"].update(add_prefix(word_features["1"], "-1:"))
        context_features["test"].update(add_prefix(word_features["."], "+1:"))

        context_features["."].update(add_prefix(word_features["test"], "-1:"))

        # Add BOS/EOS
        context_features["1"]["BOS"] = True
        context_features["."]["EOS"] = True

        expected_features = [
            context_features["1"],
            context_features["test"],
            context_features["."],
        ]

        actual_features = PartOfSpeechTagger.sent2features(
            sentence,
            words_forward=1,
            words_backward=1,
            chars_front=2,
            chars_back=2,
            encode=False,
        )

        self.assertEqual(expected_features, actual_features)


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    unittest.main()