ai-content-maker/.venv/Lib/site-packages/gruut_ipa/__main__.py

394 lines
12 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
#!/usr/bin/env python3
"""Command-line interface to gruut-ipa"""
import argparse
import itertools
import json
import logging
import os
import sys
import typing
from pathlib import Path
# -----------------------------------------------------------------------------
_LOGGER = logging.getLogger("gruut_ipa")
_DIR = Path(__file__).parent
_DATA_DIR = _DIR / "data"
# -----------------------------------------------------------------------------
def main():
"""Main entry point"""
args = get_args()
if args.debug:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
_LOGGER.debug(args)
# Dispatch to sub-command
args.func(args)
# -----------------------------------------------------------------------------
def do_print(args):
"""Print known IPA phones"""
from gruut_ipa import CONSONANTS, SCHWAS, VOWELS, Phoneme, Phonemes
from gruut_ipa.espeak import ipa_to_espeak
from gruut_ipa.sampa import ipa_to_sampa
allowed_phonemes: typing.Set[str] = set()
if args.language:
# Load phonemes using language code
phonemes_path = _DATA_DIR / args.language / "phonemes.txt"
_LOGGER.debug("Loading phonemes from %s", phonemes_path)
with open(phonemes_path, "r", encoding="utf-8") as phonemes_file:
phonemes = Phonemes.from_text(phonemes_file)
allowed_phonemes.update(p.text for p in phonemes)
for phone_str in sorted(itertools.chain(VOWELS, CONSONANTS, SCHWAS)):
phone = Phoneme(phone_str)
if allowed_phonemes and (phone.text not in allowed_phonemes):
# Skip phoneme outside language
continue
description = ""
if phone.vowel:
v = phone.vowel
description = (
v.height.value
+ " "
+ v.placement.value
+ " "
+ ("rounded" if v.rounded else "unrounded")
+ " vowel"
)
elif phone.consonant:
c = phone.consonant
description = (
("voiced" if c.voiced else "voiceless")
+ " "
+ c.place.value
+ " "
+ c.type.value
)
elif phone.schwa:
s = phone.schwa
if s.r_coloured:
description = "r-coloured schwa"
else:
description = "schwa"
phone_dict = phone.to_dict()
phone_dict["description"] = description
# Add espeak/sampa
phone_dict["espeak"] = ipa_to_espeak(phone_str)
phone_dict["sampa"] = ipa_to_sampa(phone_str)
phone_dict_str = json.dumps(phone_dict, ensure_ascii=False)
print(phone_dict_str)
# -----------------------------------------------------------------------------
def do_describe(args):
"""Describe IPA phones"""
from gruut_ipa import Phoneme
if args.phone:
# From arguments
phones = args.phone
else:
# From stdin
phones = sys.stdin
if os.isatty(sys.stdin.fileno()):
print("Reading phones from stdin...", file=sys.stderr)
for line in phones:
line = line.strip()
if line:
line_phone = Phoneme(text=line)
phone_str = json.dumps(line_phone.to_dict(), ensure_ascii=False)
print(phone_str)
sys.stdout.flush()
# -----------------------------------------------------------------------------
def do_phones(args):
"""Group phones in IPA pronunciation"""
from gruut_ipa import Pronunciation
if args.pronunciation:
# From arguments
pronunciations = args.pronunciation
else:
# From stdin
pronunciations = sys.stdin
if os.isatty(sys.stdin.fileno()):
print("Reading pronunciations from stdin...", file=sys.stderr)
for line in pronunciations:
line = line.strip()
if line:
line_pron = Pronunciation.from_string(line)
phones_str = args.separator.join(p.text for p in line_pron if p.text)
print(phones_str)
sys.stdout.flush()
# -----------------------------------------------------------------------------
def do_phonemes(args):
"""Group phones in IPA pronuncation according to language phonemes"""
from gruut_ipa import Phonemes
if args.pronunciation:
# From arguments
pronunciations = args.pronunciation
else:
# From stdin
pronunciations = sys.stdin
if os.isatty(sys.stdin.fileno()):
print("Reading pronunciations from stdin...", file=sys.stderr)
if args.phonemes_file:
# Load phonemes from file
phonemes_path = Path(args.phonemes_file)
else:
# Load phonemes using language code
phonemes_path = _DATA_DIR / args.language / "phonemes.txt"
# Check language support
if not phonemes_path.is_file():
supported_languages = [d.name for d in _DATA_DIR.iterdir() if d.is_dir()]
_LOGGER.fatal("Unsupported language: %s", args.language)
_LOGGER.fatal("Supported languages: %s", supported_languages)
sys.exit(1)
_LOGGER.debug("Loading phonemes from %s", phonemes_path)
with open(phonemes_path, "r", encoding="utf-8") as phonemes_file:
phonemes = Phonemes.from_text(phonemes_file)
for line in pronunciations:
line = line.strip()
if line:
line_phonemes = phonemes.split(
line, keep_stress=args.keep_stress, drop_tones=args.drop_tones
)
phonemes_str = args.separator.join(p.text for p in line_phonemes if p.text)
print(phonemes_str)
sys.stdout.flush()
# -----------------------------------------------------------------------------
def do_convert(args):
"""Convert pronunciations between different representations"""
from gruut_ipa import Phoneme, Phonemes
from gruut_ipa.espeak import espeak_to_ipa, ipa_to_espeak
from gruut_ipa.sampa import ipa_to_sampa, sampa_to_ipa
fixed_src_dest = {"ipa", "espeak", "sampa"}
src_phonemes: typing.Optional[Phonemes] = None
dest_phonemes: typing.Optional[Phonemes] = None
if args.src not in fixed_src_dest:
src_phonemes = Phonemes.from_language(args.src)
if args.dest not in fixed_src_dest:
dest_phoneme_map = Phonemes.from_language(args.dest).gruut_ipa_map
# ipa -> original phoneme
dest_phonemes = Phonemes()
for k, v in dest_phoneme_map.items():
if v in dest_phonemes.gruut_ipa_map:
continue
dest_phonemes.phonemes.append(Phoneme(text=k, is_ipa=False))
dest_phonemes.ipa_map[v] = k
dest_phonemes.update()
if args.pronunciation:
# From arguments
pronunciations = args.pronunciation
else:
# From stdin
pronunciations = sys.stdin
if os.isatty(sys.stdin.fileno()):
print("Reading pronunciations from stdin...", file=sys.stderr)
for line in pronunciations:
line = line.strip()
if line:
if args.src == "ipa":
src_ipa = line
elif args.src == "espeak":
src_ipa = espeak_to_ipa(line)
elif args.src == "sampa":
src_ipa = sampa_to_ipa(line)
else:
assert src_phonemes is not None
src_ipa = args.separator.join(
src_phonemes.gruut_ipa_map.get(p.text, p.text)
for p in src_phonemes.split(line)
)
if args.dest == "ipa":
dest_pron = src_ipa
elif args.dest == "espeak":
dest_pron = "[[" + ipa_to_espeak(src_ipa) + "]]"
elif args.dest == "sampa":
dest_pron = ipa_to_sampa(src_ipa)
else:
assert dest_phonemes is not None
dest_pron = args.separator.join(
p.text for p in dest_phonemes.split(src_ipa, is_ipa=False)
)
print(dest_pron)
sys.stdout.flush()
# -----------------------------------------------------------------------------
def get_args() -> argparse.Namespace:
"""Parse command-line arguments"""
parser = argparse.ArgumentParser(prog="gruut_ipa")
# Create subparsers for each sub-command
sub_parsers = parser.add_subparsers()
sub_parsers.required = True
sub_parsers.dest = "command"
# -----
# print
# -----
print_parser = sub_parsers.add_parser("print", help="Print all known IPA phones")
print_parser.add_argument(
"--language", help="Only print phones from a specific language or language/set"
)
print_parser.set_defaults(func=do_print)
# --------
# describe
# --------
describe_parser = sub_parsers.add_parser("describe", help="Describe IPA phone(s)")
describe_parser.set_defaults(func=do_describe)
describe_parser.add_argument(
"phone", nargs="*", help="IPA phones (read from stdin if not provided)"
)
# --------
# phones
# --------
phones_parser = sub_parsers.add_parser(
"phones", help="Group phones in IPA pronunciation"
)
phones_parser.set_defaults(func=do_phones)
phones_parser.add_argument(
"pronunciation",
nargs="*",
help="IPA pronunciations (read from stdin if not provided)",
)
phones_parser.add_argument(
"--separator",
default=" ",
help="Separator to add between phones in output (default: space)",
)
# --------
# phonemes
# --------
phonemes_parser = sub_parsers.add_parser(
"phonemes",
help="Group phones in IPA pronunciation according to language phonemes",
)
phonemes_parser.set_defaults(func=do_phonemes)
phonemes_parser.add_argument("language", help="Language code (e.g., en-us)")
phonemes_parser.add_argument(
"pronunciation",
nargs="*",
help="IPA pronunciations (read from stdin if not provided)",
)
phonemes_parser.add_argument(
"--separator",
default=" ",
help="Separator to add between phonemes in output (default: space)",
)
phonemes_parser.add_argument(
"--keep-stress",
action="store_true",
help="Keep primary/secondary stress markers",
)
phonemes_parser.add_argument(
"--drop-tones", action="store_true", help="Remove tone numbers/letters"
)
phonemes_parser.add_argument(
"--phonemes-file", help="Load phonemes from file instead of using language code"
)
# -------
# convert
# -------
convert_parser = sub_parsers.add_parser(
"convert", help="Convert pronunciations between ipa, espeak, and sampa"
)
convert_parser.set_defaults(func=do_convert)
convert_parser.add_argument(
"src", help="Source format (language, language/set, ipa, espeak, sampa)"
)
convert_parser.add_argument(
"dest", help="Destination format (language, language/set, ipa, espeak, sampa)"
)
convert_parser.add_argument(
"pronunciation",
nargs="*",
help="Pronunciations (read from stdin if not provided)",
)
convert_parser.add_argument(
"--separator", default=" ", help="Separator between phonemes (default: space)"
)
# Shared arguments
for sub_parser in [
print_parser,
describe_parser,
phones_parser,
phonemes_parser,
convert_parser,
]:
sub_parser.add_argument(
"--debug", action="store_true", help="Print DEBUG messages to console"
)
return parser.parse_args()
# -----------------------------------------------------------------------------
if __name__ == "__main__":
main()