ai-content-maker/.venv/Lib/site-packages/tests/test_ssml.py

397 lines
12 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
#!/usr/bin/env python3
"""Tests for SSML"""
import sys
import unittest
from gruut import sentences
from gruut.resources import _DIR
from gruut.utils import print_graph
class SSMLTestCase(unittest.TestCase):
"""Test cases for SSML"""
def test_wikipedia_example(self):
"""Test SSML example from Wikipedia"""
text = """<?xml version="1.0"?>
<speak xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:dc="http://purl.org/dc/elements/1.1/"
version="1.0">
<metadata>
<dc:title xml:lang="en">Telephone Menu: Level 1</dc:title>
</metadata>
<p>
<s xml:lang="en-US">
<voice name="David" gender="male" age="25">
For English, press <emphasis>one</emphasis>.
</voice>
</s>
<s xml:lang="es-MX">
<voice name="Miguel" gender="male" age="25">
Para español, oprima el <emphasis>dos</emphasis>.
</voice>
</s>
</p>
</speak>"""
results = [
(w.sent_idx, w.idx, w.lang, w.voice, w.text)
for sent in sentences(text, ssml=True)
for w in sent
]
self.assertEqual(
results,
[
(0, 0, "en-US", "David", "For"),
(0, 1, "en-US", "David", "English"),
(0, 2, "en-US", "David", ","),
(0, 3, "en-US", "David", "press"),
(0, 4, "en-US", "David", "one"),
(0, 5, "en-US", "David", "."),
(1, 0, "es-MX", "Miguel", "Para"),
(1, 1, "es-MX", "Miguel", "español"),
(1, 2, "es-MX", "Miguel", ","),
(1, 3, "es-MX", "Miguel", "oprima"),
(1, 4, "es-MX", "Miguel", "el"),
(1, 5, "es-MX", "Miguel", "dos"),
(1, 6, "es-MX", "Miguel", "."),
],
)
def test_lang_s(self):
"""Test lang on <s>"""
text = """<?xml version="1.0" encoding="ISO-8859-1"?>
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
xml:lang="en-US">
<s>Today, 2/1/2000.</s>
<!-- Today, February first two thousand -->
<s xml:lang="it">Un mese , 2/1/2000.</s>
<!-- Un mese , il due gennaio duemila -->
<!-- One month ago, the second of January two thousand -->
</speak>"""
results = [
(w.sent_idx, w.idx, w.lang, w.text)
for sent in sentences(text, ssml=True)
for w in sent
]
self.assertEqual(
results,
[
(0, 0, "en-US", "Today"),
(0, 1, "en-US", ","),
(0, 2, "en-US", "February"),
(0, 3, "en-US", "first"),
(0, 4, "en-US", ","),
(0, 5, "en-US", "two"),
(0, 6, "en-US", "thousand"),
(0, 7, "en-US", "."),
(1, 0, "it", "Un"),
(1, 1, "it", "mese"),
(1, 2, "it", ""),
(1, 3, "it", ","),
# no "il"
(1, 4, "it", "due"),
(1, 5, "it", "gennaio"),
(1, 6, "it", "duemila"),
(1, 7, "it", "."),
],
)
def test_phoneme(self):
"""Test manual phoneme insertion"""
text = """<?xml version="1.0"?>
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
xml:lang="en-US">
<phoneme alphabet="ipa" ph="t&#x259;mei&#x325;&#x27E;ou&#x325;"> tomato </phoneme>
<!-- This is an example of IPA using character entities -->
<!-- Because many platform/browser/text editor combinations do not
correctly cut and paste Unicode text, this example uses the entity
escape versions of the IPA characters. Normally, one would directly
use the UTF-8 representation of these symbols: "təmei̥ɾou̥". -->
</speak>"""
results = [
(w.sent_idx, w.idx, w.lang, w.text, w.phonemes)
for sent in sentences(text, ssml=True)
for w in sent
]
self.assertEqual(
results,
[(0, 0, "en-US", "tomato", ["t", "ə", "m", "e", "", "ɾ", "o", ""])],
)
def test_sentences(self):
"""Test <s>"""
text = """<?xml version="1.0"?>
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
xml:lang="en-US">
<p>
<s>This is the first sentence of the paragraph.</s>
<s>Here's another sentence.</s>
</p>
</speak>"""
results = [
(w.sent_idx, w.idx, w.text)
for sent in sentences(text, ssml=True)
for w in sent
]
self.assertEqual(
results,
[
(0, 0, "This"),
(0, 1, "is"),
(0, 2, "the"),
(0, 3, "first"),
(0, 4, "sentence"),
(0, 5, "of"),
(0, 6, "the"),
(0, 7, "paragraph"),
(0, 8, "."),
(1, 0, "Here's"),
(1, 1, "another"),
(1, 2, "sentence"),
(1, 3, "."),
],
)
def test_token(self):
"""Test explicit tokenization"""
# NOTE: Added full stops
text = """<?xml version="1.0"?>
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
xml:lang="zh-CN">
<!-- The Nanjing Changjiang River Bridge -->
<token>南京市</token><token>长江大桥</token>
<!-- The mayor of Nanjing city, Jiang Daqiao -->
南京市长<w>江大桥</w>
<!-- Shanghai is a metropolis -->
上海是个<w>大都会</w>
<!-- Most Shanghainese will say something like that -->
上海人<w>大都</w>会那么说
</speak>"""
results = [
(w.sent_idx, w.idx, w.text)
for sent in sentences(text, ssml=True)
for w in sent
]
self.assertEqual(
results,
[
(0, 0, "南京市"),
(0, 1, "长江大桥"),
(0, 2, ""),
(1, 0, ""),
(1, 1, ""),
(1, 2, ""),
(1, 3, ""),
(1, 4, "江大桥"),
(1, 5, ""),
(2, 0, ""),
(2, 1, ""),
(2, 2, ""),
(2, 3, ""),
(2, 4, "大都会"),
(2, 5, ""),
(3, 0, ""),
(3, 1, ""),
(3, 2, ""),
(3, 3, "大都"),
(3, 4, ""),
(3, 5, ""),
(3, 6, ""),
(3, 7, ""),
(3, 8, ""),
],
)
def test_sub(self):
"""Test <sub>"""
text = """<?xml version="1.0"?>
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
xml:lang="en-US">
<sub alias="World Wide Web Consortium">W3C</sub> is an international community
<!-- World Wide Web Consortium -->
</speak>"""
results = [
(w.sent_idx, w.idx, w.text)
for sent in sentences(text, ssml=True)
for w in sent
]
self.assertEqual(
results,
[
(0, 0, "World"),
(0, 1, "Wide"),
(0, 2, "Web"),
(0, 3, "Consortium"),
(0, 4, "is"),
(0, 5, "an"),
(0, 6, "international"),
(0, 7, "community"),
],
)
def test_lang_element(self):
"""Test <lang>"""
text = """<?xml version="1.0"?>
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
xml:lang="en-US">
The French word for cat is <w xml:lang="fr">chat</w>.
He prefers to eat pasta that is <lang xml:lang="it">al dente</lang>.
</speak>"""
results = [
(w.sent_idx, w.idx, w.lang, w.text)
for sent in sentences(text, ssml=True)
for w in sent
]
self.assertEqual(
results,
[
(0, 0, "en-US", "The"),
(0, 1, "en-US", "French"),
(0, 2, "en-US", "word"),
(0, 3, "en-US", "for"),
(0, 4, "en-US", "cat"),
(0, 5, "en-US", "is"),
(0, 6, "fr", "chat"),
(0, 7, "en-US", "."),
(1, 0, "en-US", "He"),
(1, 1, "en-US", "prefers"),
(1, 2, "en-US", "to"),
(1, 3, "en-US", "eat"),
(1, 4, "en-US", "pasta"),
(1, 5, "en-US", "that"),
(1, 6, "en-US", "is"),
(1, 7, "it", "al"),
(1, 8, "it", "dente"),
(1, 9, "en-US", "."),
],
)
def test_lexicon(self):
"""Test <lexicon> and <lookup>"""
text = """<?xml version="1.0"?>
<speak version="1.1"
xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
xml:lang="en-US">
<lexicon xml:id="test" alphabet="ipa">
<lexeme>
<grapheme>
tomato
</grapheme>
<phoneme>
<!-- Individual phonemes are separated by whitespace -->
t ə m ˈɑ t
</phoneme>
</lexeme>
<lexeme role="fake-role">
<grapheme>
tomato
</grapheme>
<phoneme>
<!-- Made up pronunciation for fake word role -->
t ə m ˈi t
</phoneme>
</lexeme>
</lexicon>
<w>tomato</w>
<lookup ref="test">
<w>tomato</w>
<w role="fake-role">tomato</w>
</lookup>
</speak>"""
results = [
(w.sent_idx, w.idx, w.phonemes)
for sent in sentences(text, ssml=True)
for w in sent
]
self.assertEqual(
results,
[
(0, 0, ["t", "ə", "m", "ˈeɪ", "t", ""]),
(0, 1, ["t", "ə", "m", "ˈɑ", "t", ""]),
(0, 2, ["t", "ə", "m", "ˈi", "t", ""]),
],
)
def test_lexicon_external(self):
"""Test <lexicon> from URI"""
lexicon_path = (_DIR.parent / "etc" / "sample_lexicon.xml").absolute()
text = f"""<?xml version="1.0"?>
<speak version="1.1"
xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
xml:lang="en-US">
<lexicon xml:id="test" alphabet="ipa" uri="file://{lexicon_path}" />
<lookup ref="test">
<w>tomato</w>
</lookup>
</speak>"""
results = [
(w.sent_idx, w.idx, w.phonemes)
for sent in sentences(text, ssml=True)
for w in sent
]
self.assertEqual(
results, [(0, 0, ["t", "ə", "m", "e", "", "ɾ", "o", ""])],
)
def print_graph_stderr(graph, root):
"""Print graph to stderr"""
print_graph(graph, root, print_func=lambda *p: print(*p, file=sys.stderr))
# -----------------------------------------------------------------------------
if __name__ == "__main__":
unittest.main()