397 lines
12 KiB
Python
397 lines
12 KiB
Python
#!/usr/bin/env python3
|
||
"""Tests for SSML"""
|
||
import sys
|
||
import unittest
|
||
|
||
from gruut import sentences
|
||
from gruut.resources import _DIR
|
||
from gruut.utils import print_graph
|
||
|
||
|
||
class SSMLTestCase(unittest.TestCase):
|
||
"""Test cases for SSML"""
|
||
|
||
def test_wikipedia_example(self):
|
||
"""Test SSML example from Wikipedia"""
|
||
text = """<?xml version="1.0"?>
|
||
<speak xmlns="http://www.w3.org/2001/10/synthesis"
|
||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||
version="1.0">
|
||
<metadata>
|
||
<dc:title xml:lang="en">Telephone Menu: Level 1</dc:title>
|
||
</metadata>
|
||
|
||
<p>
|
||
<s xml:lang="en-US">
|
||
<voice name="David" gender="male" age="25">
|
||
For English, press <emphasis>one</emphasis>.
|
||
</voice>
|
||
</s>
|
||
<s xml:lang="es-MX">
|
||
<voice name="Miguel" gender="male" age="25">
|
||
Para español, oprima el <emphasis>dos</emphasis>.
|
||
</voice>
|
||
</s>
|
||
</p>
|
||
|
||
</speak>"""
|
||
|
||
results = [
|
||
(w.sent_idx, w.idx, w.lang, w.voice, w.text)
|
||
for sent in sentences(text, ssml=True)
|
||
for w in sent
|
||
]
|
||
|
||
self.assertEqual(
|
||
results,
|
||
[
|
||
(0, 0, "en-US", "David", "For"),
|
||
(0, 1, "en-US", "David", "English"),
|
||
(0, 2, "en-US", "David", ","),
|
||
(0, 3, "en-US", "David", "press"),
|
||
(0, 4, "en-US", "David", "one"),
|
||
(0, 5, "en-US", "David", "."),
|
||
(1, 0, "es-MX", "Miguel", "Para"),
|
||
(1, 1, "es-MX", "Miguel", "español"),
|
||
(1, 2, "es-MX", "Miguel", ","),
|
||
(1, 3, "es-MX", "Miguel", "oprima"),
|
||
(1, 4, "es-MX", "Miguel", "el"),
|
||
(1, 5, "es-MX", "Miguel", "dos"),
|
||
(1, 6, "es-MX", "Miguel", "."),
|
||
],
|
||
)
|
||
|
||
def test_lang_s(self):
|
||
"""Test lang on <s>"""
|
||
text = """<?xml version="1.0" encoding="ISO-8859-1"?>
|
||
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
|
||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
|
||
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
|
||
xml:lang="en-US">
|
||
<s>Today, 2/1/2000.</s>
|
||
<!-- Today, February first two thousand -->
|
||
<s xml:lang="it">Un mese fà, 2/1/2000.</s>
|
||
<!-- Un mese fà, il due gennaio duemila -->
|
||
<!-- One month ago, the second of January two thousand -->
|
||
</speak>"""
|
||
|
||
results = [
|
||
(w.sent_idx, w.idx, w.lang, w.text)
|
||
for sent in sentences(text, ssml=True)
|
||
for w in sent
|
||
]
|
||
|
||
self.assertEqual(
|
||
results,
|
||
[
|
||
(0, 0, "en-US", "Today"),
|
||
(0, 1, "en-US", ","),
|
||
(0, 2, "en-US", "February"),
|
||
(0, 3, "en-US", "first"),
|
||
(0, 4, "en-US", ","),
|
||
(0, 5, "en-US", "two"),
|
||
(0, 6, "en-US", "thousand"),
|
||
(0, 7, "en-US", "."),
|
||
(1, 0, "it", "Un"),
|
||
(1, 1, "it", "mese"),
|
||
(1, 2, "it", "fà"),
|
||
(1, 3, "it", ","),
|
||
# no "il"
|
||
(1, 4, "it", "due"),
|
||
(1, 5, "it", "gennaio"),
|
||
(1, 6, "it", "duemila"),
|
||
(1, 7, "it", "."),
|
||
],
|
||
)
|
||
|
||
def test_phoneme(self):
|
||
"""Test manual phoneme insertion"""
|
||
text = """<?xml version="1.0"?>
|
||
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
|
||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
|
||
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
|
||
xml:lang="en-US">
|
||
<phoneme alphabet="ipa" ph="təmei̥ɾou̥"> tomato </phoneme>
|
||
<!-- This is an example of IPA using character entities -->
|
||
<!-- Because many platform/browser/text editor combinations do not
|
||
correctly cut and paste Unicode text, this example uses the entity
|
||
escape versions of the IPA characters. Normally, one would directly
|
||
use the UTF-8 representation of these symbols: "təmei̥ɾou̥". -->
|
||
</speak>"""
|
||
|
||
results = [
|
||
(w.sent_idx, w.idx, w.lang, w.text, w.phonemes)
|
||
for sent in sentences(text, ssml=True)
|
||
for w in sent
|
||
]
|
||
|
||
self.assertEqual(
|
||
results,
|
||
[(0, 0, "en-US", "tomato", ["t", "ə", "m", "e", "i̥", "ɾ", "o", "u̥"])],
|
||
)
|
||
|
||
def test_sentences(self):
|
||
"""Test <s>"""
|
||
text = """<?xml version="1.0"?>
|
||
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
|
||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
|
||
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
|
||
xml:lang="en-US">
|
||
<p>
|
||
<s>This is the first sentence of the paragraph.</s>
|
||
<s>Here's another sentence.</s>
|
||
</p>
|
||
</speak>"""
|
||
|
||
results = [
|
||
(w.sent_idx, w.idx, w.text)
|
||
for sent in sentences(text, ssml=True)
|
||
for w in sent
|
||
]
|
||
|
||
self.assertEqual(
|
||
results,
|
||
[
|
||
(0, 0, "This"),
|
||
(0, 1, "is"),
|
||
(0, 2, "the"),
|
||
(0, 3, "first"),
|
||
(0, 4, "sentence"),
|
||
(0, 5, "of"),
|
||
(0, 6, "the"),
|
||
(0, 7, "paragraph"),
|
||
(0, 8, "."),
|
||
(1, 0, "Here's"),
|
||
(1, 1, "another"),
|
||
(1, 2, "sentence"),
|
||
(1, 3, "."),
|
||
],
|
||
)
|
||
|
||
def test_token(self):
|
||
"""Test explicit tokenization"""
|
||
|
||
# NOTE: Added full stops
|
||
text = """<?xml version="1.0"?>
|
||
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
|
||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
|
||
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
|
||
xml:lang="zh-CN">
|
||
|
||
<!-- The Nanjing Changjiang River Bridge -->
|
||
<token>南京市</token><token>长江大桥</token>。
|
||
<!-- The mayor of Nanjing city, Jiang Daqiao -->
|
||
南京市长<w>江大桥</w>。
|
||
<!-- Shanghai is a metropolis -->
|
||
上海是个<w>大都会</w>。
|
||
<!-- Most Shanghainese will say something like that -->
|
||
上海人<w>大都</w>会那么说。
|
||
</speak>"""
|
||
|
||
results = [
|
||
(w.sent_idx, w.idx, w.text)
|
||
for sent in sentences(text, ssml=True)
|
||
for w in sent
|
||
]
|
||
|
||
self.assertEqual(
|
||
results,
|
||
[
|
||
(0, 0, "南京市"),
|
||
(0, 1, "长江大桥"),
|
||
(0, 2, "。"),
|
||
(1, 0, "南"),
|
||
(1, 1, "京"),
|
||
(1, 2, "市"),
|
||
(1, 3, "长"),
|
||
(1, 4, "江大桥"),
|
||
(1, 5, "。"),
|
||
(2, 0, "上"),
|
||
(2, 1, "海"),
|
||
(2, 2, "是"),
|
||
(2, 3, "个"),
|
||
(2, 4, "大都会"),
|
||
(2, 5, "。"),
|
||
(3, 0, "上"),
|
||
(3, 1, "海"),
|
||
(3, 2, "人"),
|
||
(3, 3, "大都"),
|
||
(3, 4, "会"),
|
||
(3, 5, "那"),
|
||
(3, 6, "么"),
|
||
(3, 7, "说"),
|
||
(3, 8, "。"),
|
||
],
|
||
)
|
||
|
||
def test_sub(self):
|
||
"""Test <sub>"""
|
||
text = """<?xml version="1.0"?>
|
||
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
|
||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
|
||
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
|
||
xml:lang="en-US">
|
||
<sub alias="World Wide Web Consortium">W3C</sub> is an international community
|
||
<!-- World Wide Web Consortium -->
|
||
</speak>"""
|
||
|
||
results = [
|
||
(w.sent_idx, w.idx, w.text)
|
||
for sent in sentences(text, ssml=True)
|
||
for w in sent
|
||
]
|
||
|
||
self.assertEqual(
|
||
results,
|
||
[
|
||
(0, 0, "World"),
|
||
(0, 1, "Wide"),
|
||
(0, 2, "Web"),
|
||
(0, 3, "Consortium"),
|
||
(0, 4, "is"),
|
||
(0, 5, "an"),
|
||
(0, 6, "international"),
|
||
(0, 7, "community"),
|
||
],
|
||
)
|
||
|
||
def test_lang_element(self):
|
||
"""Test <lang>"""
|
||
text = """<?xml version="1.0"?>
|
||
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis"
|
||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
|
||
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
|
||
xml:lang="en-US">
|
||
The French word for cat is <w xml:lang="fr">chat</w>.
|
||
He prefers to eat pasta that is <lang xml:lang="it">al dente</lang>.
|
||
</speak>"""
|
||
|
||
results = [
|
||
(w.sent_idx, w.idx, w.lang, w.text)
|
||
for sent in sentences(text, ssml=True)
|
||
for w in sent
|
||
]
|
||
|
||
self.assertEqual(
|
||
results,
|
||
[
|
||
(0, 0, "en-US", "The"),
|
||
(0, 1, "en-US", "French"),
|
||
(0, 2, "en-US", "word"),
|
||
(0, 3, "en-US", "for"),
|
||
(0, 4, "en-US", "cat"),
|
||
(0, 5, "en-US", "is"),
|
||
(0, 6, "fr", "chat"),
|
||
(0, 7, "en-US", "."),
|
||
(1, 0, "en-US", "He"),
|
||
(1, 1, "en-US", "prefers"),
|
||
(1, 2, "en-US", "to"),
|
||
(1, 3, "en-US", "eat"),
|
||
(1, 4, "en-US", "pasta"),
|
||
(1, 5, "en-US", "that"),
|
||
(1, 6, "en-US", "is"),
|
||
(1, 7, "it", "al"),
|
||
(1, 8, "it", "dente"),
|
||
(1, 9, "en-US", "."),
|
||
],
|
||
)
|
||
|
||
def test_lexicon(self):
|
||
"""Test <lexicon> and <lookup>"""
|
||
text = """<?xml version="1.0"?>
|
||
<speak version="1.1"
|
||
xmlns="http://www.w3.org/2001/10/synthesis"
|
||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
|
||
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
|
||
xml:lang="en-US">
|
||
|
||
<lexicon xml:id="test" alphabet="ipa">
|
||
<lexeme>
|
||
<grapheme>
|
||
tomato
|
||
</grapheme>
|
||
<phoneme>
|
||
<!-- Individual phonemes are separated by whitespace -->
|
||
t ə m ˈɑ t oʊ
|
||
</phoneme>
|
||
</lexeme>
|
||
<lexeme role="fake-role">
|
||
<grapheme>
|
||
tomato
|
||
</grapheme>
|
||
<phoneme>
|
||
<!-- Made up pronunciation for fake word role -->
|
||
t ə m ˈi t oʊ
|
||
</phoneme>
|
||
</lexeme>
|
||
</lexicon>
|
||
|
||
<w>tomato</w>
|
||
<lookup ref="test">
|
||
<w>tomato</w>
|
||
<w role="fake-role">tomato</w>
|
||
</lookup>
|
||
</speak>"""
|
||
|
||
results = [
|
||
(w.sent_idx, w.idx, w.phonemes)
|
||
for sent in sentences(text, ssml=True)
|
||
for w in sent
|
||
]
|
||
|
||
self.assertEqual(
|
||
results,
|
||
[
|
||
(0, 0, ["t", "ə", "m", "ˈeɪ", "t", "oʊ"]),
|
||
(0, 1, ["t", "ə", "m", "ˈɑ", "t", "oʊ"]),
|
||
(0, 2, ["t", "ə", "m", "ˈi", "t", "oʊ"]),
|
||
],
|
||
)
|
||
|
||
def test_lexicon_external(self):
|
||
"""Test <lexicon> from URI"""
|
||
lexicon_path = (_DIR.parent / "etc" / "sample_lexicon.xml").absolute()
|
||
|
||
text = f"""<?xml version="1.0"?>
|
||
<speak version="1.1"
|
||
xmlns="http://www.w3.org/2001/10/synthesis"
|
||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
|
||
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
|
||
xml:lang="en-US">
|
||
|
||
<lexicon xml:id="test" alphabet="ipa" uri="file://{lexicon_path}" />
|
||
|
||
<lookup ref="test">
|
||
<w>tomato</w>
|
||
</lookup>
|
||
</speak>"""
|
||
|
||
results = [
|
||
(w.sent_idx, w.idx, w.phonemes)
|
||
for sent in sentences(text, ssml=True)
|
||
for w in sent
|
||
]
|
||
|
||
self.assertEqual(
|
||
results, [(0, 0, ["t", "ə", "m", "e", "i̥", "ɾ", "o", "u̥"])],
|
||
)
|
||
|
||
|
||
def print_graph_stderr(graph, root):
|
||
"""Print graph to stderr"""
|
||
print_graph(graph, root, print_func=lambda *p: print(*p, file=sys.stderr))
|
||
|
||
|
||
# -----------------------------------------------------------------------------
|
||
|
||
if __name__ == "__main__":
|
||
unittest.main()
|