ai-content-maker/.venv/Lib/site-packages/gruut_ipa/constants.py

711 lines
23 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
"""Enums, vowels, and consonants for gruut-ipa"""
import typing
import unicodedata
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
_DIR = Path(__file__).parent
_DATA_DIR = _DIR / "data"
LANG_ALIASES = {
"ar": "ar",
"cs": "cs-cz",
"de": "de-de",
"en": "en-us",
"es": "es-es",
"fa": "fa",
"fr": "fr-fr",
"it": "it-it",
"nl": "nl",
"pt-br": "pt",
"ru": "ru-ru",
"sv": "sv-se",
"sw": "sw",
}
class IPA(str, Enum):
"""International phonetic alphabet characters"""
STRESS_PRIMARY = "\u02C8" # ˈ
STRESS_SECONDARY = "\u02CC" # ˌ
ACCENT_ACUTE = "'"
ACCENT_GRAVE = "²"
LONG = "\u02D0" # ː
HALF_LONG = "\u02D1" # eˑ
EXTRA_SHORT = "\u0306" # ə̆
NASAL = "\u0303" # ẽ
RAISED = "\u031D" # r̝
TIE_ABOVE = "\u0361" # ͡
TIE_BELOW = "\u035C" # ͜
SYLLABIC = "\u0329"
NON_SYLLABIC = "\u032F"
BREAK_SYLLABLE = "."
BREAK_MINOR = "|"
BREAK_MAJOR = "\u2016" # ‖
BREAK_WORD = "#"
INTONATION_RISING = "\u2197" # ↗
INTONATION_FALLING = "\u2198" # ↘
TONE_1 = "¹"
TONE_2 = "²"
TONE_3 = "³"
TONE_4 = ""
TONE_5 = ""
TONE_6 = ""
TONE_7 = ""
TONE_8 = ""
TONE_9 = ""
TONE_EXTRA_HIGH = "˥"
TONE_HIGH = "˦"
TONE_MID = "˧"
TONE_LOW = "˨"
TONE_EXTRA_LOW = "˩"
TONE_GLOTTALIZED = "ˀ"
TONE_SHORT = "ʔ"
BRACKET_PHONETIC_LEFT = "["
BRACKET_PHONETIC_RIGHT = "]"
BRACKET_PHONEMIC_LEFT = "/"
BRACKET_PHONEMIC_RIGHT = "/"
BRACKET_PROSODIC_LEFT = "{"
BRACKET_PROSODIC_RIGHT = "}"
BRACKET_OPTIONAL_LEFT = "("
BRACKET_OPTIONAL_RIGHT = ")"
@staticmethod
def is_long(codepoint: str) -> bool:
"""True if elongated symbol"""
return codepoint == IPA.LONG
@staticmethod
def is_nasal(codepoint: str) -> bool:
"""True if nasalated diacritic"""
return codepoint == IPA.NASAL
@staticmethod
def is_raised(codepoint: str) -> bool:
"""True if rased diacritic"""
return codepoint == IPA.RAISED
@staticmethod
def is_stress(codepoint: str) -> bool:
"""True if primary/secondary stress symbol"""
return codepoint in (IPA.STRESS_PRIMARY, IPA.STRESS_SECONDARY)
@staticmethod
def is_accent(codepoint: str) -> bool:
"""True if accent symbol"""
return codepoint in {IPA.ACCENT_ACUTE, IPA.ACCENT_GRAVE}
@staticmethod
def is_tie(codepoint: str) -> bool:
"""True if above/below tie symbol"""
return codepoint in (IPA.TIE_ABOVE, IPA.TIE_BELOW)
@staticmethod
def is_bracket(codepoint: str) -> bool:
"""True if any IPA bracket symbol"""
return codepoint in {
IPA.BRACKET_PHONETIC_LEFT,
IPA.BRACKET_PHONETIC_RIGHT,
IPA.BRACKET_PHONEMIC_LEFT,
IPA.BRACKET_PHONEMIC_RIGHT,
IPA.BRACKET_PROSODIC_LEFT,
IPA.BRACKET_PROSODIC_RIGHT,
IPA.BRACKET_OPTIONAL_LEFT,
IPA.BRACKET_OPTIONAL_RIGHT,
}
@staticmethod
def is_break(codepoint: str) -> bool:
"""True if any IPA break symbol"""
return codepoint in {
IPA.BREAK_SYLLABLE,
IPA.BREAK_MINOR,
IPA.BREAK_MAJOR,
IPA.BREAK_WORD,
}
@staticmethod
def is_intonation(codepoint: str) -> bool:
"""True if a rising or falling IPA intonation symbol"""
return codepoint in {IPA.INTONATION_RISING, IPA.INTONATION_FALLING}
@staticmethod
def is_tone(codepoint: str) -> bool:
"""True if any IPA tone symbol"""
return codepoint in {
IPA.TONE_1,
IPA.TONE_2,
IPA.TONE_3,
IPA.TONE_4,
IPA.TONE_5,
IPA.TONE_6,
IPA.TONE_7,
IPA.TONE_8,
IPA.TONE_9,
IPA.TONE_EXTRA_HIGH,
IPA.TONE_HIGH,
IPA.TONE_MID,
IPA.TONE_LOW,
IPA.TONE_EXTRA_LOW,
}
@staticmethod
def graphemes(codepoints: str) -> typing.List[str]:
"""Split a string into graphemes"""
codepoints = unicodedata.normalize("NFD", codepoints)
graphemes = []
grapheme = ""
for c in codepoints:
if unicodedata.combining(c) > 0:
grapheme += c
elif grapheme:
# Next grapheme
graphemes.append(unicodedata.normalize("NFC", grapheme))
grapheme = c
else:
# Start of grapheme
grapheme = c
if grapheme:
# Final grapheme
graphemes.append(unicodedata.normalize("NFC", grapheme))
return graphemes
@staticmethod
def without_stress(codepoints: str, drop_accent: bool = True) -> str:
"""Return string without primary/secondary stress"""
return "".join(
c
for c in codepoints
if (not IPA.is_stress(c) and (not drop_accent or not IPA.is_accent(c)))
)
class Stress(str, Enum):
"""Applied stress"""
SECONDARY = "secondary"
PRIMARY = "primary"
class Accent(str, Enum):
"""Applied accent"""
ACUTE = "acute" # '
GRAVE = "grave" # ²
class BreakType(str, Enum):
"""Type of break"""
WORD = "word" # '#'
MINOR = "minor" # |
MAJOR = "major" # ‖
class PhonemeLength(str, Enum):
"""Spoken length of a phoneme"""
SHORT = "short" # ˑ
NORMAL = "normal"
LONG = "long" # ː
# -----------------------------------------------------------------------------
class VowelHeight(str, Enum):
"""Height of a vowel"""
CLOSE = "close"
NEAR_CLOSE = "near-close"
CLOSE_MID = "close-mid"
MID = "mid"
OPEN_MID = "open-mid"
NEAR_OPEN = "near-open"
OPEN = "open"
class VowelPlacement(str, Enum):
"""Front/back placement of a vowel"""
FRONT = "front"
NEAR_FRONT = "near-front"
CENTRAL = "central"
NEAR_BACK = "near-back"
BACK = "back"
@dataclass
class Vowel:
"""Necessary information for a vowel"""
ipa: str
height: VowelHeight
placement: VowelPlacement
rounded: bool
nasalated: bool = False
stress: typing.Optional[Stress] = None
length: PhonemeLength = PhonemeLength.NORMAL
alias_of: typing.Optional[str] = None
# -----------------------------------------------------------------
# Vowels Front Near-Front Central Near-Back Back
# -----------------------------------------------------------------
# Close i/y ɨ/ʉ ɯ/u
# Near-Close ɪ/ʏ ʊ
# Close-Mid e/ø ɘ/ɵ ɤ/o
# Mid ə
# Open-Mid ɛ/œ ɜ/ɞ ʌ/ɔ
# Near-Open æ ɐ
# Open a/ɶ ɑ
# -----------------------------------------------------------------
_VOWELS = [
Vowel("i", VowelHeight.CLOSE, VowelPlacement.FRONT, False),
Vowel("y", VowelHeight.CLOSE, VowelPlacement.FRONT, True),
Vowel("ɨ", VowelHeight.CLOSE, VowelPlacement.CENTRAL, False),
Vowel("", VowelHeight.CLOSE, VowelPlacement.CENTRAL, False, alias_of="ɨ"),
Vowel("ʉ", VowelHeight.CLOSE, VowelPlacement.CENTRAL, True),
Vowel("ɯ", VowelHeight.CLOSE, VowelPlacement.BACK, False),
Vowel("u", VowelHeight.CLOSE, VowelPlacement.BACK, True),
#
Vowel("ɪ", VowelHeight.NEAR_CLOSE, VowelPlacement.NEAR_FRONT, False),
Vowel("ʏ", VowelHeight.NEAR_CLOSE, VowelPlacement.NEAR_FRONT, True),
Vowel("ʊ", VowelHeight.NEAR_CLOSE, VowelPlacement.NEAR_BACK, True),
#
Vowel("e", VowelHeight.CLOSE_MID, VowelPlacement.FRONT, False),
Vowel("", VowelHeight.CLOSE_MID, VowelPlacement.FRONT, False, nasalated=True),
Vowel("ø", VowelHeight.CLOSE_MID, VowelPlacement.FRONT, True),
Vowel("ɘ", VowelHeight.CLOSE_MID, VowelPlacement.CENTRAL, False),
Vowel("ɵ", VowelHeight.CLOSE_MID, VowelPlacement.CENTRAL, True),
Vowel("ɤ", VowelHeight.CLOSE_MID, VowelPlacement.BACK, False),
Vowel("o", VowelHeight.CLOSE_MID, VowelPlacement.BACK, True),
#
# Represented as a schwa too
Vowel("ə", VowelHeight.MID, VowelPlacement.CENTRAL, False),
#
Vowel("ɛ", VowelHeight.OPEN_MID, VowelPlacement.FRONT, False),
Vowel("œ", VowelHeight.OPEN_MID, VowelPlacement.FRONT, True),
Vowel("ɜ", VowelHeight.OPEN_MID, VowelPlacement.CENTRAL, False),
Vowel("ɞ", VowelHeight.OPEN_MID, VowelPlacement.CENTRAL, True),
Vowel("ʌ", VowelHeight.OPEN_MID, VowelPlacement.BACK, False),
Vowel("ɔ", VowelHeight.OPEN_MID, VowelPlacement.BACK, True),
Vowel("ɔ̃", VowelHeight.OPEN_MID, VowelPlacement.BACK, True, nasalated=True),
#
Vowel("æ", VowelHeight.NEAR_OPEN, VowelPlacement.FRONT, False),
Vowel("ɐ", VowelHeight.NEAR_OPEN, VowelPlacement.CENTRAL, False),
#
Vowel("a", VowelHeight.OPEN, VowelPlacement.FRONT, False),
Vowel("", VowelHeight.OPEN, VowelPlacement.FRONT, False, nasalated=True),
Vowel("ɶ", VowelHeight.OPEN, VowelPlacement.FRONT, True),
Vowel("ɑ", VowelHeight.OPEN, VowelPlacement.BACK, False),
Vowel("ɒ", VowelHeight.OPEN, VowelPlacement.BACK, True),
]
VOWELS = {v.ipa: v for v in _VOWELS}
# -----------------------------------------------------------------------------
@dataclass
class Dipthong:
"""Combination of two vowels"""
vowel1: Vowel
vowel2: Vowel
# -----------------------------------------------------------------------------
@dataclass
class Schwa:
"""Vowel-like sound"""
ipa: str
r_coloured: bool
length: PhonemeLength = PhonemeLength.NORMAL
alias_of: typing.Optional[str] = None
_SCHWAS = [Schwa("ə", False), Schwa("ɚ", True), Schwa("ɝ", True, alias_of="ɚ")]
SCHWAS = {s.ipa: s for s in _SCHWAS}
# -----------------------------------------------------------------------------
class ConsonantType(str, Enum):
"""Type of a consonant"""
NASAL = "nasal"
PLOSIVE = "plosive"
AFFRICATE = "affricate"
FRICATIVE = "fricative"
APPROXIMANT = "approximant"
FLAP = "flap"
TRILL = "trill"
LATERAL_APPROXIMANT = "lateral-approximant"
class ConsonantPlace(str, Enum):
"""Place of articulation"""
BILABIAL = "bilabial"
LABIO_DENTAL = "labio-dental"
DENTAL = "dental"
ALVEOLAR = "alveolar"
POST_ALVEOLAR = "post-alveolar"
RETROFLEX = "retroflex"
PALATAL = "palatal"
VELAR = "velar"
UVULAR = "uvular"
PHARYNGEAL = "pharyngeal"
GLOTTAL = "glottal"
class ConsonantSoundsLike(str, Enum):
"""Class of sounds this consonant is similar to"""
NONE = ""
R = "r"
G = "g"
L = "l"
@dataclass
class Consonant:
"""Necessary information for a consonant"""
ipa: str
type: ConsonantType
place: ConsonantPlace
voiced: bool
velarized: bool = False
sounds_like: ConsonantSoundsLike = ConsonantSoundsLike.NONE
length: PhonemeLength = PhonemeLength.NORMAL
alias_of: typing.Optional[str] = None
# --------------------------------------------------------------------------------------------------------------------------------------------
# Type Bilabial Labiodental Dental Alveolar Postalveolar Retroflex Palatal Velar Uvular Pharyngeal Glottal
# --------------------------------------------------------------------------------------------------------------------------------------------
# Nasal m ɱ n ɳ ɲ ŋ ɴ
# Plosive p/b t/d ʈ/ɖ c/ɟ k/ɡ q/ɢ ʡ ʔ
# Affricate p͡f/b͡v t̪͡s̪/b͡v̪ t͡s/d͡z t͡ʃ/d͡ʒ ʈ͡ʂ/ɖ͡ʐ t͡ɕ/d͡ʑ k͡x
# Fricative ɸ/β f/v θ/ð s/z ʃ/ʒ ʂ/ʐ ç/ʝ x/ɣ χ/ʁ ħ h ɦ
# Approximant w ʋ ɹ ɻ j ɰ
# Flap ⱱ ɾ ɽ
# Trill ʙ r ʀ
# Lateral App l ɭ ʎ ʟ
# --------------------------------------------------------------------------------------------------------------------------------------------
_CONSONANTS = [
Consonant("m", ConsonantType.NASAL, ConsonantPlace.BILABIAL, True),
Consonant("ɱ", ConsonantType.NASAL, ConsonantPlace.LABIO_DENTAL, True),
Consonant("n", ConsonantType.NASAL, ConsonantPlace.ALVEOLAR, True),
Consonant("ɳ", ConsonantType.NASAL, ConsonantPlace.RETROFLEX, True),
Consonant("ɲ", ConsonantType.NASAL, ConsonantPlace.PALATAL, True),
Consonant("ŋ", ConsonantType.NASAL, ConsonantPlace.VELAR, True),
Consonant("ɴ", ConsonantType.NASAL, ConsonantPlace.UVULAR, True),
#
Consonant("p", ConsonantType.PLOSIVE, ConsonantPlace.BILABIAL, False),
Consonant("b", ConsonantType.PLOSIVE, ConsonantPlace.BILABIAL, True),
Consonant("t", ConsonantType.PLOSIVE, ConsonantPlace.ALVEOLAR, False),
Consonant("d", ConsonantType.PLOSIVE, ConsonantPlace.ALVEOLAR, True),
Consonant("ʈ", ConsonantType.PLOSIVE, ConsonantPlace.RETROFLEX, False),
Consonant("ɖ", ConsonantType.PLOSIVE, ConsonantPlace.RETROFLEX, True),
Consonant("c", ConsonantType.PLOSIVE, ConsonantPlace.PALATAL, False),
Consonant("ɟ", ConsonantType.PLOSIVE, ConsonantPlace.PALATAL, True),
Consonant("k", ConsonantType.PLOSIVE, ConsonantPlace.VELAR, False),
Consonant(
"ɡ",
ConsonantType.PLOSIVE,
ConsonantPlace.VELAR,
True,
sounds_like=ConsonantSoundsLike.G,
),
Consonant(
"g",
ConsonantType.PLOSIVE,
ConsonantPlace.VELAR,
True,
sounds_like=ConsonantSoundsLike.G,
alias_of="ɡ",
),
Consonant(
"q",
ConsonantType.PLOSIVE,
ConsonantPlace.UVULAR,
False,
sounds_like=ConsonantSoundsLike.G,
),
Consonant(
"ɢ",
ConsonantType.PLOSIVE,
ConsonantPlace.UVULAR,
True,
sounds_like=ConsonantSoundsLike.G,
),
Consonant("ʡ", ConsonantType.PLOSIVE, ConsonantPlace.PHARYNGEAL, False),
Consonant("ʔ", ConsonantType.PLOSIVE, ConsonantPlace.GLOTTAL, False),
#
Consonant("p͡f", ConsonantType.AFFRICATE, ConsonantPlace.LABIO_DENTAL, False),
Consonant("b͡v", ConsonantType.AFFRICATE, ConsonantPlace.LABIO_DENTAL, True),
Consonant("t̪͡s", ConsonantType.AFFRICATE, ConsonantPlace.DENTAL, False),
Consonant("b͡v", ConsonantType.AFFRICATE, ConsonantPlace.DENTAL, True),
Consonant("t͡s", ConsonantType.AFFRICATE, ConsonantPlace.ALVEOLAR, False),
Consonant("d͡z", ConsonantType.AFFRICATE, ConsonantPlace.ALVEOLAR, True),
Consonant("t͡ʃ", ConsonantType.AFFRICATE, ConsonantPlace.POST_ALVEOLAR, False),
Consonant("d͡ʒ", ConsonantType.AFFRICATE, ConsonantPlace.POST_ALVEOLAR, True),
Consonant("ʈ͡ʂ", ConsonantType.AFFRICATE, ConsonantPlace.RETROFLEX, False),
Consonant("ɖ͡ʐ", ConsonantType.AFFRICATE, ConsonantPlace.RETROFLEX, True),
Consonant("t͡ɕ", ConsonantType.AFFRICATE, ConsonantPlace.PALATAL, False),
Consonant("d͡ʑ", ConsonantType.AFFRICATE, ConsonantPlace.PALATAL, True),
Consonant("k͡x", ConsonantType.AFFRICATE, ConsonantPlace.VELAR, False),
#
Consonant("ɸ", ConsonantType.FRICATIVE, ConsonantPlace.BILABIAL, False),
Consonant("β", ConsonantType.FRICATIVE, ConsonantPlace.BILABIAL, True),
Consonant("f", ConsonantType.FRICATIVE, ConsonantPlace.LABIO_DENTAL, False),
Consonant("v", ConsonantType.FRICATIVE, ConsonantPlace.LABIO_DENTAL, True),
Consonant("θ", ConsonantType.FRICATIVE, ConsonantPlace.DENTAL, False),
Consonant("ð", ConsonantType.FRICATIVE, ConsonantPlace.DENTAL, True),
Consonant("s", ConsonantType.FRICATIVE, ConsonantPlace.ALVEOLAR, False),
Consonant("z", ConsonantType.FRICATIVE, ConsonantPlace.ALVEOLAR, True),
Consonant("ʃ", ConsonantType.FRICATIVE, ConsonantPlace.POST_ALVEOLAR, False),
Consonant("ʒ", ConsonantType.FRICATIVE, ConsonantPlace.POST_ALVEOLAR, True),
Consonant("ʂ", ConsonantType.FRICATIVE, ConsonantPlace.RETROFLEX, False),
Consonant("ʐ", ConsonantType.FRICATIVE, ConsonantPlace.RETROFLEX, True),
Consonant("ç", ConsonantType.FRICATIVE, ConsonantPlace.PALATAL, False),
Consonant(
"ʝ", ConsonantType.FRICATIVE, ConsonantPlace.PALATAL, False, alias_of="ç"
),
Consonant("ʐ", ConsonantType.FRICATIVE, ConsonantPlace.PALATAL, True),
Consonant("x", ConsonantType.FRICATIVE, ConsonantPlace.VELAR, False),
Consonant("ɣ", ConsonantType.FRICATIVE, ConsonantPlace.VELAR, True),
Consonant("χ", ConsonantType.FRICATIVE, ConsonantPlace.UVULAR, False),
Consonant(
"ʁ",
ConsonantType.FRICATIVE,
ConsonantPlace.UVULAR,
True,
sounds_like=ConsonantSoundsLike.R,
),
Consonant("ħ", ConsonantType.FRICATIVE, ConsonantPlace.PHARYNGEAL, False),
Consonant("h", ConsonantType.FRICATIVE, ConsonantPlace.GLOTTAL, False),
Consonant("ɦ", ConsonantType.FRICATIVE, ConsonantPlace.GLOTTAL, True),
#
Consonant("w", ConsonantType.APPROXIMANT, ConsonantPlace.BILABIAL, True),
Consonant("ʋ", ConsonantType.APPROXIMANT, ConsonantPlace.LABIO_DENTAL, True),
Consonant(
"ɹ",
ConsonantType.APPROXIMANT,
ConsonantPlace.ALVEOLAR,
True,
sounds_like=ConsonantSoundsLike.R,
),
Consonant(
"ɻ",
ConsonantType.APPROXIMANT,
ConsonantPlace.RETROFLEX,
True,
sounds_like=ConsonantSoundsLike.R,
),
Consonant("j", ConsonantType.APPROXIMANT, ConsonantPlace.PALATAL, True),
Consonant("ɰ", ConsonantType.APPROXIMANT, ConsonantPlace.VELAR, True),
#
Consonant("", ConsonantType.FLAP, ConsonantPlace.LABIO_DENTAL, True),
Consonant(
"ɾ",
ConsonantType.FLAP,
ConsonantPlace.ALVEOLAR,
True,
sounds_like=ConsonantSoundsLike.R,
),
Consonant(
"ɽ",
ConsonantType.FLAP,
ConsonantPlace.RETROFLEX,
True,
sounds_like=ConsonantSoundsLike.R,
),
#
Consonant("ʙ", ConsonantType.TRILL, ConsonantPlace.BILABIAL, True),
Consonant(
"r",
ConsonantType.TRILL,
ConsonantPlace.ALVEOLAR,
True,
sounds_like=ConsonantSoundsLike.R,
),
Consonant(
"ʀ",
ConsonantType.TRILL,
ConsonantPlace.UVULAR,
True,
sounds_like=ConsonantSoundsLike.R,
),
#
Consonant(
"l",
ConsonantType.LATERAL_APPROXIMANT,
ConsonantPlace.ALVEOLAR,
True,
sounds_like=ConsonantSoundsLike.L,
),
Consonant(
"ɫ",
ConsonantType.LATERAL_APPROXIMANT,
ConsonantPlace.ALVEOLAR,
True,
velarized=True,
sounds_like=ConsonantSoundsLike.L,
),
Consonant(
"ɭ",
ConsonantType.LATERAL_APPROXIMANT,
ConsonantPlace.RETROFLEX,
True,
sounds_like=ConsonantSoundsLike.L,
),
Consonant("ʎ", ConsonantType.LATERAL_APPROXIMANT, ConsonantPlace.PALATAL, True),
Consonant(
"ʟ",
ConsonantType.LATERAL_APPROXIMANT,
ConsonantPlace.VELAR,
True,
sounds_like=ConsonantSoundsLike.L,
),
]
CONSONANTS = {c.ipa: c for c in _CONSONANTS}
# -----------------------------------------------------------------------------
@dataclass
class Break:
"""IPA break/boundary"""
type: BreakType
text: str = ""
def __post_init__(self):
if self.type == BreakType.MINOR:
self.text = IPA.BREAK_MINOR
elif self.type == BreakType.MAJOR:
self.text = IPA.BREAK_MAJOR
elif self.type == BreakType.WORD:
self.text = IPA.BREAK_WORD
else:
raise ValueError(f"Unrecognized break type: {type}")
@staticmethod
def from_string(break_str: str) -> "Break":
"""Parse break from string"""
if break_str == IPA.BREAK_MINOR:
break_type = BreakType.MINOR
elif break_str == IPA.BREAK_MAJOR:
break_type = BreakType.MAJOR
elif break_str == IPA.BREAK_WORD:
break_type = BreakType.WORD
else:
raise ValueError(f"Unrecognized break type: {break_str}")
return Break(break_type)
class Intonation:
"""IPA rising/falling intonation"""
def __init__(self, rising: bool):
self.rising = rising
if self.rising:
self.text = IPA.INTONATION_RISING
else:
self.text = IPA.INTONATION_FALLING
def __repr__(self) -> str:
return self.text
@staticmethod
def from_string(intonation_str: str) -> "Intonation":
"""Parse intonation from string"""
if intonation_str == IPA.INTONATION_RISING:
rising = True
elif intonation_str == IPA.INTONATION_FALLING:
rising = False
else:
raise ValueError(f"Unrecognized intonation type: {intonation_str}")
return Intonation(rising)
# -----------------------------------------------------------------------------
FEATURE_EMPTY = "NONE"
FEATURE_COLUMNS: typing.Dict[str, typing.List[str]] = {
"symbol_type": ["phoneme", "break"],
"phoneme_type": [FEATURE_EMPTY, "vowel", "consonant", "schwa"],
"break_type": [FEATURE_EMPTY] + [v.value for v in BreakType],
"diacritic": [FEATURE_EMPTY, "nasalated", "velarized"],
"vowel_height": [FEATURE_EMPTY] + [v.value for v in VowelHeight],
"vowel_place": [FEATURE_EMPTY] + [v.value for v in VowelPlacement],
"vowel_rounded": [FEATURE_EMPTY, "rounded", "unrounded"],
"vowel_stress": [FEATURE_EMPTY] + [v.value for v in Stress],
"consonant_voiced": [FEATURE_EMPTY, "voiced", "unvoiced"],
"consonant_type": [FEATURE_EMPTY] + [v.value for v in ConsonantType],
"consonant_place": [FEATURE_EMPTY] + [v.value for v in ConsonantPlace],
"consonant_sounds_like": [FEATURE_EMPTY, "r", "l", "g", ""],
"phoneme_length": [FEATURE_EMPTY] + [v.value for v in PhonemeLength],
}
FEATURE_ORDINAL_COLUMNS: typing.Set[str] = {
"vowel_height",
"vowel_place",
"vowel_stress",
"consonant_type",
"consonant_place",
"break_type",
"phoneme_length",
}
def _make_feature_keys() -> typing.Mapping[str, typing.Union[int, slice]]:
"""Create mapping from feature column name to vector index (ordinal) or slice (one-hot)"""
feature_keys: typing.Dict[str, typing.Union[int, slice]] = {}
offset = 0
for feature_col, feature_values in FEATURE_COLUMNS.items():
if feature_col in FEATURE_ORDINAL_COLUMNS:
feature_keys[feature_col] = offset
offset += 1
else:
feature_keys[feature_col] = slice(offset, offset + len(feature_values))
offset += len(feature_values)
return feature_keys
FEATURE_KEYS = _make_feature_keys()