ai-content-maker/.venv/Lib/site-packages/tests/test_en.py

159 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Tests for English class"""
import unittest
from gruut import sentences
class EnglishTestCase(unittest.TestCase):
"""Test cases for English"""
def test_unclean_text(self):
"""Test text with lots of noise"""
text = (
"ITS <a> 'test' (seNtEnce) for-only $100, Dr., & [I] ## *like* ## it 100%!"
)
sentence = next(sentences(text, lang="en_US"))
self.assertEqual(
[
"IT'S",
"<",
"a",
">",
"'",
"test",
"'",
"(",
"seNtEnce",
")",
"for",
"only",
"one",
"hundred",
"dollars",
",",
"Doctor",
",",
"and",
"[",
"I",
"]",
"*",
"like",
"*",
"it",
"one",
"hundred",
"percent",
"!",
],
[word.text for word in sentence],
)
def test_spell_out(self):
"""Test spell-out in say-as SSML tag"""
text = '<say-as interpret-as="spell-out">abc@1+2-3*.*</say-as>'
sentence = next(sentences(text, lang="en_US", ssml=True))
self.assertEqual(
[
"a",
"b",
"c",
"at",
"one",
"plus",
"two",
"dash",
"three",
"star",
"dot",
"star",
],
[word.text for word in sentence],
)
def test_initialisms(self):
"""Test expansion of initialisms"""
text = "ABC abc A.B.C."
sentence = next(sentences(text, lang="en_US"))
self.assertEqual(
["A", "B", "C", "abc", "A", "B", "C"], [word.text for word in sentence],
)
def test_dates(self):
"""Test expansion of dates"""
text = "1/4/1999 vs. 4/1/1999"
sentence = next(sentences(text, lang="en_US"))
self.assertEqual(
[
"January",
"fourth",
",",
"nineteen",
"ninety",
"nine",
"versus",
"April",
"first",
",",
"nineteen",
"ninety",
"nine",
],
[word.text for word in sentence],
)
def test_ordinals(self):
"""Test parsing of ordinal numbers"""
text = "1st, 2nd, 3rd, 4th, 5th, 23rd, 32nd, 44th, 121st, 5,111st."
sentence = next(sentences(text, lang="en_US"))
self.assertEqual(
[
"first",
",",
"second",
",",
"third",
",",
"fourth",
",",
"fifth",
",",
"twenty",
"third",
",",
"thirty",
"second",
",",
"forty",
"fourth",
",",
"one",
"hundred",
"and",
"twenty",
"first",
",",
"five",
"thousand",
",",
"one",
"hundred",
"and",
"eleventh",
".",
],
[word.text for word in sentence],
)
# -----------------------------------------------------------------------------
if __name__ == "__main__":
unittest.main()