ai-content-maker/.venv/Lib/site-packages/gruut_ipa/constants.py

711 lines
23 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Enums, vowels, and consonants for gruut-ipa"""
import typing
import unicodedata
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
_DIR = Path(__file__).parent
_DATA_DIR = _DIR / "data"
LANG_ALIASES = {
"ar": "ar",
"cs": "cs-cz",
"de": "de-de",
"en": "en-us",
"es": "es-es",
"fa": "fa",
"fr": "fr-fr",
"it": "it-it",
"nl": "nl",
"pt-br": "pt",
"ru": "ru-ru",
"sv": "sv-se",
"sw": "sw",
}
class IPA(str, Enum):
"""International phonetic alphabet characters"""
STRESS_PRIMARY = "\u02C8" # ˈ
STRESS_SECONDARY = "\u02CC" # ˌ
ACCENT_ACUTE = "'"
ACCENT_GRAVE = "²"
LONG = "\u02D0" # ː
HALF_LONG = "\u02D1" # eˑ
EXTRA_SHORT = "\u0306" # ə̆
NASAL = "\u0303" # ẽ
RAISED = "\u031D" # r̝
TIE_ABOVE = "\u0361" # ͡
TIE_BELOW = "\u035C" # ͜
SYLLABIC = "\u0329"
NON_SYLLABIC = "\u032F"
BREAK_SYLLABLE = "."
BREAK_MINOR = "|"
BREAK_MAJOR = "\u2016" # ‖
BREAK_WORD = "#"
INTONATION_RISING = "\u2197" # ↗
INTONATION_FALLING = "\u2198" # ↘
TONE_1 = "¹"
TONE_2 = "²"
TONE_3 = "³"
TONE_4 = ""
TONE_5 = ""
TONE_6 = ""
TONE_7 = ""
TONE_8 = ""
TONE_9 = ""
TONE_EXTRA_HIGH = "˥"
TONE_HIGH = "˦"
TONE_MID = "˧"
TONE_LOW = "˨"
TONE_EXTRA_LOW = "˩"
TONE_GLOTTALIZED = "ˀ"
TONE_SHORT = "ʔ"
BRACKET_PHONETIC_LEFT = "["
BRACKET_PHONETIC_RIGHT = "]"
BRACKET_PHONEMIC_LEFT = "/"
BRACKET_PHONEMIC_RIGHT = "/"
BRACKET_PROSODIC_LEFT = "{"
BRACKET_PROSODIC_RIGHT = "}"
BRACKET_OPTIONAL_LEFT = "("
BRACKET_OPTIONAL_RIGHT = ")"
@staticmethod
def is_long(codepoint: str) -> bool:
"""True if elongated symbol"""
return codepoint == IPA.LONG
@staticmethod
def is_nasal(codepoint: str) -> bool:
"""True if nasalated diacritic"""
return codepoint == IPA.NASAL
@staticmethod
def is_raised(codepoint: str) -> bool:
"""True if rased diacritic"""
return codepoint == IPA.RAISED
@staticmethod
def is_stress(codepoint: str) -> bool:
"""True if primary/secondary stress symbol"""
return codepoint in (IPA.STRESS_PRIMARY, IPA.STRESS_SECONDARY)
@staticmethod
def is_accent(codepoint: str) -> bool:
"""True if accent symbol"""
return codepoint in {IPA.ACCENT_ACUTE, IPA.ACCENT_GRAVE}
@staticmethod
def is_tie(codepoint: str) -> bool:
"""True if above/below tie symbol"""
return codepoint in (IPA.TIE_ABOVE, IPA.TIE_BELOW)
@staticmethod
def is_bracket(codepoint: str) -> bool:
"""True if any IPA bracket symbol"""
return codepoint in {
IPA.BRACKET_PHONETIC_LEFT,
IPA.BRACKET_PHONETIC_RIGHT,
IPA.BRACKET_PHONEMIC_LEFT,
IPA.BRACKET_PHONEMIC_RIGHT,
IPA.BRACKET_PROSODIC_LEFT,
IPA.BRACKET_PROSODIC_RIGHT,
IPA.BRACKET_OPTIONAL_LEFT,
IPA.BRACKET_OPTIONAL_RIGHT,
}
@staticmethod
def is_break(codepoint: str) -> bool:
"""True if any IPA break symbol"""
return codepoint in {
IPA.BREAK_SYLLABLE,
IPA.BREAK_MINOR,
IPA.BREAK_MAJOR,
IPA.BREAK_WORD,
}
@staticmethod
def is_intonation(codepoint: str) -> bool:
"""True if a rising or falling IPA intonation symbol"""
return codepoint in {IPA.INTONATION_RISING, IPA.INTONATION_FALLING}
@staticmethod
def is_tone(codepoint: str) -> bool:
"""True if any IPA tone symbol"""
return codepoint in {
IPA.TONE_1,
IPA.TONE_2,
IPA.TONE_3,
IPA.TONE_4,
IPA.TONE_5,
IPA.TONE_6,
IPA.TONE_7,
IPA.TONE_8,
IPA.TONE_9,
IPA.TONE_EXTRA_HIGH,
IPA.TONE_HIGH,
IPA.TONE_MID,
IPA.TONE_LOW,
IPA.TONE_EXTRA_LOW,
}
@staticmethod
def graphemes(codepoints: str) -> typing.List[str]:
"""Split a string into graphemes"""
codepoints = unicodedata.normalize("NFD", codepoints)
graphemes = []
grapheme = ""
for c in codepoints:
if unicodedata.combining(c) > 0:
grapheme += c
elif grapheme:
# Next grapheme
graphemes.append(unicodedata.normalize("NFC", grapheme))
grapheme = c
else:
# Start of grapheme
grapheme = c
if grapheme:
# Final grapheme
graphemes.append(unicodedata.normalize("NFC", grapheme))
return graphemes
@staticmethod
def without_stress(codepoints: str, drop_accent: bool = True) -> str:
"""Return string without primary/secondary stress"""
return "".join(
c
for c in codepoints
if (not IPA.is_stress(c) and (not drop_accent or not IPA.is_accent(c)))
)
class Stress(str, Enum):
"""Applied stress"""
SECONDARY = "secondary"
PRIMARY = "primary"
class Accent(str, Enum):
"""Applied accent"""
ACUTE = "acute" # '
GRAVE = "grave" # ²
class BreakType(str, Enum):
"""Type of break"""
WORD = "word" # '#'
MINOR = "minor" # |
MAJOR = "major" # ‖
class PhonemeLength(str, Enum):
"""Spoken length of a phoneme"""
SHORT = "short" # ˑ
NORMAL = "normal"
LONG = "long" # ː
# -----------------------------------------------------------------------------
class VowelHeight(str, Enum):
"""Height of a vowel"""
CLOSE = "close"
NEAR_CLOSE = "near-close"
CLOSE_MID = "close-mid"
MID = "mid"
OPEN_MID = "open-mid"
NEAR_OPEN = "near-open"
OPEN = "open"
class VowelPlacement(str, Enum):
"""Front/back placement of a vowel"""
FRONT = "front"
NEAR_FRONT = "near-front"
CENTRAL = "central"
NEAR_BACK = "near-back"
BACK = "back"
@dataclass
class Vowel:
"""Necessary information for a vowel"""
ipa: str
height: VowelHeight
placement: VowelPlacement
rounded: bool
nasalated: bool = False
stress: typing.Optional[Stress] = None
length: PhonemeLength = PhonemeLength.NORMAL
alias_of: typing.Optional[str] = None
# -----------------------------------------------------------------
# Vowels Front Near-Front Central Near-Back Back
# -----------------------------------------------------------------
# Close i/y ɨ/ʉ ɯ/u
# Near-Close ɪ/ʏ ʊ
# Close-Mid e/ø ɘ/ɵ ɤ/o
# Mid ə
# Open-Mid ɛ/œ ɜ/ɞ ʌ/ɔ
# Near-Open æ ɐ
# Open a/ɶ ɑ
# -----------------------------------------------------------------
_VOWELS = [
Vowel("i", VowelHeight.CLOSE, VowelPlacement.FRONT, False),
Vowel("y", VowelHeight.CLOSE, VowelPlacement.FRONT, True),
Vowel("ɨ", VowelHeight.CLOSE, VowelPlacement.CENTRAL, False),
Vowel("", VowelHeight.CLOSE, VowelPlacement.CENTRAL, False, alias_of="ɨ"),
Vowel("ʉ", VowelHeight.CLOSE, VowelPlacement.CENTRAL, True),
Vowel("ɯ", VowelHeight.CLOSE, VowelPlacement.BACK, False),
Vowel("u", VowelHeight.CLOSE, VowelPlacement.BACK, True),
#
Vowel("ɪ", VowelHeight.NEAR_CLOSE, VowelPlacement.NEAR_FRONT, False),
Vowel("ʏ", VowelHeight.NEAR_CLOSE, VowelPlacement.NEAR_FRONT, True),
Vowel("ʊ", VowelHeight.NEAR_CLOSE, VowelPlacement.NEAR_BACK, True),
#
Vowel("e", VowelHeight.CLOSE_MID, VowelPlacement.FRONT, False),
Vowel("", VowelHeight.CLOSE_MID, VowelPlacement.FRONT, False, nasalated=True),
Vowel("ø", VowelHeight.CLOSE_MID, VowelPlacement.FRONT, True),
Vowel("ɘ", VowelHeight.CLOSE_MID, VowelPlacement.CENTRAL, False),
Vowel("ɵ", VowelHeight.CLOSE_MID, VowelPlacement.CENTRAL, True),
Vowel("ɤ", VowelHeight.CLOSE_MID, VowelPlacement.BACK, False),
Vowel("o", VowelHeight.CLOSE_MID, VowelPlacement.BACK, True),
#
# Represented as a schwa too
Vowel("ə", VowelHeight.MID, VowelPlacement.CENTRAL, False),
#
Vowel("ɛ", VowelHeight.OPEN_MID, VowelPlacement.FRONT, False),
Vowel("œ", VowelHeight.OPEN_MID, VowelPlacement.FRONT, True),
Vowel("ɜ", VowelHeight.OPEN_MID, VowelPlacement.CENTRAL, False),
Vowel("ɞ", VowelHeight.OPEN_MID, VowelPlacement.CENTRAL, True),
Vowel("ʌ", VowelHeight.OPEN_MID, VowelPlacement.BACK, False),
Vowel("ɔ", VowelHeight.OPEN_MID, VowelPlacement.BACK, True),
Vowel("ɔ̃", VowelHeight.OPEN_MID, VowelPlacement.BACK, True, nasalated=True),
#
Vowel("æ", VowelHeight.NEAR_OPEN, VowelPlacement.FRONT, False),
Vowel("ɐ", VowelHeight.NEAR_OPEN, VowelPlacement.CENTRAL, False),
#
Vowel("a", VowelHeight.OPEN, VowelPlacement.FRONT, False),
Vowel("", VowelHeight.OPEN, VowelPlacement.FRONT, False, nasalated=True),
Vowel("ɶ", VowelHeight.OPEN, VowelPlacement.FRONT, True),
Vowel("ɑ", VowelHeight.OPEN, VowelPlacement.BACK, False),
Vowel("ɒ", VowelHeight.OPEN, VowelPlacement.BACK, True),
]
VOWELS = {v.ipa: v for v in _VOWELS}
# -----------------------------------------------------------------------------
@dataclass
class Dipthong:
"""Combination of two vowels"""
vowel1: Vowel
vowel2: Vowel
# -----------------------------------------------------------------------------
@dataclass
class Schwa:
"""Vowel-like sound"""
ipa: str
r_coloured: bool
length: PhonemeLength = PhonemeLength.NORMAL
alias_of: typing.Optional[str] = None
_SCHWAS = [Schwa("ə", False), Schwa("ɚ", True), Schwa("ɝ", True, alias_of="ɚ")]
SCHWAS = {s.ipa: s for s in _SCHWAS}
# -----------------------------------------------------------------------------
class ConsonantType(str, Enum):
"""Type of a consonant"""
NASAL = "nasal"
PLOSIVE = "plosive"
AFFRICATE = "affricate"
FRICATIVE = "fricative"
APPROXIMANT = "approximant"
FLAP = "flap"
TRILL = "trill"
LATERAL_APPROXIMANT = "lateral-approximant"
class ConsonantPlace(str, Enum):
"""Place of articulation"""
BILABIAL = "bilabial"
LABIO_DENTAL = "labio-dental"
DENTAL = "dental"
ALVEOLAR = "alveolar"
POST_ALVEOLAR = "post-alveolar"
RETROFLEX = "retroflex"
PALATAL = "palatal"
VELAR = "velar"
UVULAR = "uvular"
PHARYNGEAL = "pharyngeal"
GLOTTAL = "glottal"
class ConsonantSoundsLike(str, Enum):
"""Class of sounds this consonant is similar to"""
NONE = ""
R = "r"
G = "g"
L = "l"
@dataclass
class Consonant:
"""Necessary information for a consonant"""
ipa: str
type: ConsonantType
place: ConsonantPlace
voiced: bool
velarized: bool = False
sounds_like: ConsonantSoundsLike = ConsonantSoundsLike.NONE
length: PhonemeLength = PhonemeLength.NORMAL
alias_of: typing.Optional[str] = None
# --------------------------------------------------------------------------------------------------------------------------------------------
# Type Bilabial Labiodental Dental Alveolar Postalveolar Retroflex Palatal Velar Uvular Pharyngeal Glottal
# --------------------------------------------------------------------------------------------------------------------------------------------
# Nasal m ɱ n ɳ ɲ ŋ ɴ
# Plosive p/b t/d ʈ/ɖ c/ɟ k/ɡ q/ɢ ʡ ʔ
# Affricate p͡f/b͡v t̪͡s̪/b͡v̪ t͡s/d͡z t͡ʃ/d͡ʒ ʈ͡ʂ/ɖ͡ʐ t͡ɕ/d͡ʑ k͡x
# Fricative ɸ/β f/v θ/ð s/z ʃ/ʒ ʂ/ʐ ç/ʝ x/ɣ χ/ʁ ħ h ɦ
# Approximant w ʋ ɹ ɻ j ɰ
# Flap ⱱ ɾ ɽ
# Trill ʙ r ʀ
# Lateral App l ɭ ʎ ʟ
# --------------------------------------------------------------------------------------------------------------------------------------------
_CONSONANTS = [
Consonant("m", ConsonantType.NASAL, ConsonantPlace.BILABIAL, True),
Consonant("ɱ", ConsonantType.NASAL, ConsonantPlace.LABIO_DENTAL, True),
Consonant("n", ConsonantType.NASAL, ConsonantPlace.ALVEOLAR, True),
Consonant("ɳ", ConsonantType.NASAL, ConsonantPlace.RETROFLEX, True),
Consonant("ɲ", ConsonantType.NASAL, ConsonantPlace.PALATAL, True),
Consonant("ŋ", ConsonantType.NASAL, ConsonantPlace.VELAR, True),
Consonant("ɴ", ConsonantType.NASAL, ConsonantPlace.UVULAR, True),
#
Consonant("p", ConsonantType.PLOSIVE, ConsonantPlace.BILABIAL, False),
Consonant("b", ConsonantType.PLOSIVE, ConsonantPlace.BILABIAL, True),
Consonant("t", ConsonantType.PLOSIVE, ConsonantPlace.ALVEOLAR, False),
Consonant("d", ConsonantType.PLOSIVE, ConsonantPlace.ALVEOLAR, True),
Consonant("ʈ", ConsonantType.PLOSIVE, ConsonantPlace.RETROFLEX, False),
Consonant("ɖ", ConsonantType.PLOSIVE, ConsonantPlace.RETROFLEX, True),
Consonant("c", ConsonantType.PLOSIVE, ConsonantPlace.PALATAL, False),
Consonant("ɟ", ConsonantType.PLOSIVE, ConsonantPlace.PALATAL, True),
Consonant("k", ConsonantType.PLOSIVE, ConsonantPlace.VELAR, False),
Consonant(
"ɡ",
ConsonantType.PLOSIVE,
ConsonantPlace.VELAR,
True,
sounds_like=ConsonantSoundsLike.G,
),
Consonant(
"g",
ConsonantType.PLOSIVE,
ConsonantPlace.VELAR,
True,
sounds_like=ConsonantSoundsLike.G,
alias_of="ɡ",
),
Consonant(
"q",
ConsonantType.PLOSIVE,
ConsonantPlace.UVULAR,
False,
sounds_like=ConsonantSoundsLike.G,
),
Consonant(
"ɢ",
ConsonantType.PLOSIVE,
ConsonantPlace.UVULAR,
True,
sounds_like=ConsonantSoundsLike.G,
),
Consonant("ʡ", ConsonantType.PLOSIVE, ConsonantPlace.PHARYNGEAL, False),
Consonant("ʔ", ConsonantType.PLOSIVE, ConsonantPlace.GLOTTAL, False),
#
Consonant("p͡f", ConsonantType.AFFRICATE, ConsonantPlace.LABIO_DENTAL, False),
Consonant("b͡v", ConsonantType.AFFRICATE, ConsonantPlace.LABIO_DENTAL, True),
Consonant("t̪͡s", ConsonantType.AFFRICATE, ConsonantPlace.DENTAL, False),
Consonant("b͡v", ConsonantType.AFFRICATE, ConsonantPlace.DENTAL, True),
Consonant("t͡s", ConsonantType.AFFRICATE, ConsonantPlace.ALVEOLAR, False),
Consonant("d͡z", ConsonantType.AFFRICATE, ConsonantPlace.ALVEOLAR, True),
Consonant("t͡ʃ", ConsonantType.AFFRICATE, ConsonantPlace.POST_ALVEOLAR, False),
Consonant("d͡ʒ", ConsonantType.AFFRICATE, ConsonantPlace.POST_ALVEOLAR, True),
Consonant("ʈ͡ʂ", ConsonantType.AFFRICATE, ConsonantPlace.RETROFLEX, False),
Consonant("ɖ͡ʐ", ConsonantType.AFFRICATE, ConsonantPlace.RETROFLEX, True),
Consonant("t͡ɕ", ConsonantType.AFFRICATE, ConsonantPlace.PALATAL, False),
Consonant("d͡ʑ", ConsonantType.AFFRICATE, ConsonantPlace.PALATAL, True),
Consonant("k͡x", ConsonantType.AFFRICATE, ConsonantPlace.VELAR, False),
#
Consonant("ɸ", ConsonantType.FRICATIVE, ConsonantPlace.BILABIAL, False),
Consonant("β", ConsonantType.FRICATIVE, ConsonantPlace.BILABIAL, True),
Consonant("f", ConsonantType.FRICATIVE, ConsonantPlace.LABIO_DENTAL, False),
Consonant("v", ConsonantType.FRICATIVE, ConsonantPlace.LABIO_DENTAL, True),
Consonant("θ", ConsonantType.FRICATIVE, ConsonantPlace.DENTAL, False),
Consonant("ð", ConsonantType.FRICATIVE, ConsonantPlace.DENTAL, True),
Consonant("s", ConsonantType.FRICATIVE, ConsonantPlace.ALVEOLAR, False),
Consonant("z", ConsonantType.FRICATIVE, ConsonantPlace.ALVEOLAR, True),
Consonant("ʃ", ConsonantType.FRICATIVE, ConsonantPlace.POST_ALVEOLAR, False),
Consonant("ʒ", ConsonantType.FRICATIVE, ConsonantPlace.POST_ALVEOLAR, True),
Consonant("ʂ", ConsonantType.FRICATIVE, ConsonantPlace.RETROFLEX, False),
Consonant("ʐ", ConsonantType.FRICATIVE, ConsonantPlace.RETROFLEX, True),
Consonant("ç", ConsonantType.FRICATIVE, ConsonantPlace.PALATAL, False),
Consonant(
"ʝ", ConsonantType.FRICATIVE, ConsonantPlace.PALATAL, False, alias_of="ç"
),
Consonant("ʐ", ConsonantType.FRICATIVE, ConsonantPlace.PALATAL, True),
Consonant("x", ConsonantType.FRICATIVE, ConsonantPlace.VELAR, False),
Consonant("ɣ", ConsonantType.FRICATIVE, ConsonantPlace.VELAR, True),
Consonant("χ", ConsonantType.FRICATIVE, ConsonantPlace.UVULAR, False),
Consonant(
"ʁ",
ConsonantType.FRICATIVE,
ConsonantPlace.UVULAR,
True,
sounds_like=ConsonantSoundsLike.R,
),
Consonant("ħ", ConsonantType.FRICATIVE, ConsonantPlace.PHARYNGEAL, False),
Consonant("h", ConsonantType.FRICATIVE, ConsonantPlace.GLOTTAL, False),
Consonant("ɦ", ConsonantType.FRICATIVE, ConsonantPlace.GLOTTAL, True),
#
Consonant("w", ConsonantType.APPROXIMANT, ConsonantPlace.BILABIAL, True),
Consonant("ʋ", ConsonantType.APPROXIMANT, ConsonantPlace.LABIO_DENTAL, True),
Consonant(
"ɹ",
ConsonantType.APPROXIMANT,
ConsonantPlace.ALVEOLAR,
True,
sounds_like=ConsonantSoundsLike.R,
),
Consonant(
"ɻ",
ConsonantType.APPROXIMANT,
ConsonantPlace.RETROFLEX,
True,
sounds_like=ConsonantSoundsLike.R,
),
Consonant("j", ConsonantType.APPROXIMANT, ConsonantPlace.PALATAL, True),
Consonant("ɰ", ConsonantType.APPROXIMANT, ConsonantPlace.VELAR, True),
#
Consonant("", ConsonantType.FLAP, ConsonantPlace.LABIO_DENTAL, True),
Consonant(
"ɾ",
ConsonantType.FLAP,
ConsonantPlace.ALVEOLAR,
True,
sounds_like=ConsonantSoundsLike.R,
),
Consonant(
"ɽ",
ConsonantType.FLAP,
ConsonantPlace.RETROFLEX,
True,
sounds_like=ConsonantSoundsLike.R,
),
#
Consonant("ʙ", ConsonantType.TRILL, ConsonantPlace.BILABIAL, True),
Consonant(
"r",
ConsonantType.TRILL,
ConsonantPlace.ALVEOLAR,
True,
sounds_like=ConsonantSoundsLike.R,
),
Consonant(
"ʀ",
ConsonantType.TRILL,
ConsonantPlace.UVULAR,
True,
sounds_like=ConsonantSoundsLike.R,
),
#
Consonant(
"l",
ConsonantType.LATERAL_APPROXIMANT,
ConsonantPlace.ALVEOLAR,
True,
sounds_like=ConsonantSoundsLike.L,
),
Consonant(
"ɫ",
ConsonantType.LATERAL_APPROXIMANT,
ConsonantPlace.ALVEOLAR,
True,
velarized=True,
sounds_like=ConsonantSoundsLike.L,
),
Consonant(
"ɭ",
ConsonantType.LATERAL_APPROXIMANT,
ConsonantPlace.RETROFLEX,
True,
sounds_like=ConsonantSoundsLike.L,
),
Consonant("ʎ", ConsonantType.LATERAL_APPROXIMANT, ConsonantPlace.PALATAL, True),
Consonant(
"ʟ",
ConsonantType.LATERAL_APPROXIMANT,
ConsonantPlace.VELAR,
True,
sounds_like=ConsonantSoundsLike.L,
),
]
CONSONANTS = {c.ipa: c for c in _CONSONANTS}
# -----------------------------------------------------------------------------
@dataclass
class Break:
"""IPA break/boundary"""
type: BreakType
text: str = ""
def __post_init__(self):
if self.type == BreakType.MINOR:
self.text = IPA.BREAK_MINOR
elif self.type == BreakType.MAJOR:
self.text = IPA.BREAK_MAJOR
elif self.type == BreakType.WORD:
self.text = IPA.BREAK_WORD
else:
raise ValueError(f"Unrecognized break type: {type}")
@staticmethod
def from_string(break_str: str) -> "Break":
"""Parse break from string"""
if break_str == IPA.BREAK_MINOR:
break_type = BreakType.MINOR
elif break_str == IPA.BREAK_MAJOR:
break_type = BreakType.MAJOR
elif break_str == IPA.BREAK_WORD:
break_type = BreakType.WORD
else:
raise ValueError(f"Unrecognized break type: {break_str}")
return Break(break_type)
class Intonation:
"""IPA rising/falling intonation"""
def __init__(self, rising: bool):
self.rising = rising
if self.rising:
self.text = IPA.INTONATION_RISING
else:
self.text = IPA.INTONATION_FALLING
def __repr__(self) -> str:
return self.text
@staticmethod
def from_string(intonation_str: str) -> "Intonation":
"""Parse intonation from string"""
if intonation_str == IPA.INTONATION_RISING:
rising = True
elif intonation_str == IPA.INTONATION_FALLING:
rising = False
else:
raise ValueError(f"Unrecognized intonation type: {intonation_str}")
return Intonation(rising)
# -----------------------------------------------------------------------------
FEATURE_EMPTY = "NONE"
FEATURE_COLUMNS: typing.Dict[str, typing.List[str]] = {
"symbol_type": ["phoneme", "break"],
"phoneme_type": [FEATURE_EMPTY, "vowel", "consonant", "schwa"],
"break_type": [FEATURE_EMPTY] + [v.value for v in BreakType],
"diacritic": [FEATURE_EMPTY, "nasalated", "velarized"],
"vowel_height": [FEATURE_EMPTY] + [v.value for v in VowelHeight],
"vowel_place": [FEATURE_EMPTY] + [v.value for v in VowelPlacement],
"vowel_rounded": [FEATURE_EMPTY, "rounded", "unrounded"],
"vowel_stress": [FEATURE_EMPTY] + [v.value for v in Stress],
"consonant_voiced": [FEATURE_EMPTY, "voiced", "unvoiced"],
"consonant_type": [FEATURE_EMPTY] + [v.value for v in ConsonantType],
"consonant_place": [FEATURE_EMPTY] + [v.value for v in ConsonantPlace],
"consonant_sounds_like": [FEATURE_EMPTY, "r", "l", "g", ""],
"phoneme_length": [FEATURE_EMPTY] + [v.value for v in PhonemeLength],
}
FEATURE_ORDINAL_COLUMNS: typing.Set[str] = {
"vowel_height",
"vowel_place",
"vowel_stress",
"consonant_type",
"consonant_place",
"break_type",
"phoneme_length",
}
def _make_feature_keys() -> typing.Mapping[str, typing.Union[int, slice]]:
"""Create mapping from feature column name to vector index (ordinal) or slice (one-hot)"""
feature_keys: typing.Dict[str, typing.Union[int, slice]] = {}
offset = 0
for feature_col, feature_values in FEATURE_COLUMNS.items():
if feature_col in FEATURE_ORDINAL_COLUMNS:
feature_keys[feature_col] = offset
offset += 1
else:
feature_keys[feature_col] = slice(offset, offset + len(feature_values))
offset += len(feature_values)
return feature_keys
FEATURE_KEYS = _make_feature_keys()