#!/usr/bin/env python3 """Tests for SSML""" import sys import unittest from gruut import sentences from gruut.resources import _DIR from gruut.utils import print_graph class SSMLTestCase(unittest.TestCase): """Test cases for SSML""" def test_wikipedia_example(self): """Test SSML example from Wikipedia""" text = """ Telephone Menu: Level 1

For English, press one. Para español, oprima el dos.

""" results = [ (w.sent_idx, w.idx, w.lang, w.voice, w.text) for sent in sentences(text, ssml=True) for w in sent ] self.assertEqual( results, [ (0, 0, "en-US", "David", "For"), (0, 1, "en-US", "David", "English"), (0, 2, "en-US", "David", ","), (0, 3, "en-US", "David", "press"), (0, 4, "en-US", "David", "one"), (0, 5, "en-US", "David", "."), (1, 0, "es-MX", "Miguel", "Para"), (1, 1, "es-MX", "Miguel", "español"), (1, 2, "es-MX", "Miguel", ","), (1, 3, "es-MX", "Miguel", "oprima"), (1, 4, "es-MX", "Miguel", "el"), (1, 5, "es-MX", "Miguel", "dos"), (1, 6, "es-MX", "Miguel", "."), ], ) def test_lang_s(self): """Test lang on """ text = """ Today, 2/1/2000. Un mese fà, 2/1/2000. """ results = [ (w.sent_idx, w.idx, w.lang, w.text) for sent in sentences(text, ssml=True) for w in sent ] self.assertEqual( results, [ (0, 0, "en-US", "Today"), (0, 1, "en-US", ","), (0, 2, "en-US", "February"), (0, 3, "en-US", "first"), (0, 4, "en-US", ","), (0, 5, "en-US", "two"), (0, 6, "en-US", "thousand"), (0, 7, "en-US", "."), (1, 0, "it", "Un"), (1, 1, "it", "mese"), (1, 2, "it", "fà"), (1, 3, "it", ","), # no "il" (1, 4, "it", "due"), (1, 5, "it", "gennaio"), (1, 6, "it", "duemila"), (1, 7, "it", "."), ], ) def test_phoneme(self): """Test manual phoneme insertion""" text = """ tomato """ results = [ (w.sent_idx, w.idx, w.lang, w.text, w.phonemes) for sent in sentences(text, ssml=True) for w in sent ] self.assertEqual( results, [(0, 0, "en-US", "tomato", ["t", "ə", "m", "e", "i̥", "ɾ", "o", "u̥"])], ) def test_sentences(self): """Test """ text = """

This is the first sentence of the paragraph. Here's another sentence.

""" results = [ (w.sent_idx, w.idx, w.text) for sent in sentences(text, ssml=True) for w in sent ] self.assertEqual( results, [ (0, 0, "This"), (0, 1, "is"), (0, 2, "the"), (0, 3, "first"), (0, 4, "sentence"), (0, 5, "of"), (0, 6, "the"), (0, 7, "paragraph"), (0, 8, "."), (1, 0, "Here's"), (1, 1, "another"), (1, 2, "sentence"), (1, 3, "."), ], ) def test_token(self): """Test explicit tokenization""" # NOTE: Added full stops text = """ 南京市长江大桥 南京市长江大桥 上海是个大都会 上海人大都会那么说。 """ results = [ (w.sent_idx, w.idx, w.text) for sent in sentences(text, ssml=True) for w in sent ] self.assertEqual( results, [ (0, 0, "南京市"), (0, 1, "长江大桥"), (0, 2, "。"), (1, 0, "南"), (1, 1, "京"), (1, 2, "市"), (1, 3, "长"), (1, 4, "江大桥"), (1, 5, "。"), (2, 0, "上"), (2, 1, "海"), (2, 2, "是"), (2, 3, "个"), (2, 4, "大都会"), (2, 5, "。"), (3, 0, "上"), (3, 1, "海"), (3, 2, "人"), (3, 3, "大都"), (3, 4, "会"), (3, 5, "那"), (3, 6, "么"), (3, 7, "说"), (3, 8, "。"), ], ) def test_sub(self): """Test """ text = """ W3C is an international community """ results = [ (w.sent_idx, w.idx, w.text) for sent in sentences(text, ssml=True) for w in sent ] self.assertEqual( results, [ (0, 0, "World"), (0, 1, "Wide"), (0, 2, "Web"), (0, 3, "Consortium"), (0, 4, "is"), (0, 5, "an"), (0, 6, "international"), (0, 7, "community"), ], ) def test_lang_element(self): """Test """ text = """ The French word for cat is chat. He prefers to eat pasta that is al dente. """ results = [ (w.sent_idx, w.idx, w.lang, w.text) for sent in sentences(text, ssml=True) for w in sent ] self.assertEqual( results, [ (0, 0, "en-US", "The"), (0, 1, "en-US", "French"), (0, 2, "en-US", "word"), (0, 3, "en-US", "for"), (0, 4, "en-US", "cat"), (0, 5, "en-US", "is"), (0, 6, "fr", "chat"), (0, 7, "en-US", "."), (1, 0, "en-US", "He"), (1, 1, "en-US", "prefers"), (1, 2, "en-US", "to"), (1, 3, "en-US", "eat"), (1, 4, "en-US", "pasta"), (1, 5, "en-US", "that"), (1, 6, "en-US", "is"), (1, 7, "it", "al"), (1, 8, "it", "dente"), (1, 9, "en-US", "."), ], ) def test_lexicon(self): """Test and """ text = """ tomato t ə m ˈɑ t oʊ tomato t ə m ˈi t oʊ tomato tomato tomato """ results = [ (w.sent_idx, w.idx, w.phonemes) for sent in sentences(text, ssml=True) for w in sent ] self.assertEqual( results, [ (0, 0, ["t", "ə", "m", "ˈeɪ", "t", "oʊ"]), (0, 1, ["t", "ə", "m", "ˈɑ", "t", "oʊ"]), (0, 2, ["t", "ə", "m", "ˈi", "t", "oʊ"]), ], ) def test_lexicon_external(self): """Test from URI""" lexicon_path = (_DIR.parent / "etc" / "sample_lexicon.xml").absolute() text = f""" tomato """ results = [ (w.sent_idx, w.idx, w.phonemes) for sent in sentences(text, ssml=True) for w in sent ] self.assertEqual( results, [(0, 0, ["t", "ə", "m", "e", "i̥", "ɾ", "o", "u̥"])], ) def print_graph_stderr(graph, root): """Print graph to stderr""" print_graph(graph, root, print_func=lambda *p: print(*p, file=sys.stderr)) # ----------------------------------------------------------------------------- if __name__ == "__main__": unittest.main()