#!/usr/bin/env python3 """Command-line interface to gruut-ipa""" import argparse import itertools import json import logging import os import sys import typing from pathlib import Path # ----------------------------------------------------------------------------- _LOGGER = logging.getLogger("gruut_ipa") _DIR = Path(__file__).parent _DATA_DIR = _DIR / "data" # ----------------------------------------------------------------------------- def main(): """Main entry point""" args = get_args() if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) _LOGGER.debug(args) # Dispatch to sub-command args.func(args) # ----------------------------------------------------------------------------- def do_print(args): """Print known IPA phones""" from gruut_ipa import CONSONANTS, SCHWAS, VOWELS, Phoneme, Phonemes from gruut_ipa.espeak import ipa_to_espeak from gruut_ipa.sampa import ipa_to_sampa allowed_phonemes: typing.Set[str] = set() if args.language: # Load phonemes using language code phonemes_path = _DATA_DIR / args.language / "phonemes.txt" _LOGGER.debug("Loading phonemes from %s", phonemes_path) with open(phonemes_path, "r", encoding="utf-8") as phonemes_file: phonemes = Phonemes.from_text(phonemes_file) allowed_phonemes.update(p.text for p in phonemes) for phone_str in sorted(itertools.chain(VOWELS, CONSONANTS, SCHWAS)): phone = Phoneme(phone_str) if allowed_phonemes and (phone.text not in allowed_phonemes): # Skip phoneme outside language continue description = "" if phone.vowel: v = phone.vowel description = ( v.height.value + " " + v.placement.value + " " + ("rounded" if v.rounded else "unrounded") + " vowel" ) elif phone.consonant: c = phone.consonant description = ( ("voiced" if c.voiced else "voiceless") + " " + c.place.value + " " + c.type.value ) elif phone.schwa: s = phone.schwa if s.r_coloured: description = "r-coloured schwa" else: description = "schwa" phone_dict = phone.to_dict() phone_dict["description"] = description # Add espeak/sampa phone_dict["espeak"] = ipa_to_espeak(phone_str) phone_dict["sampa"] = ipa_to_sampa(phone_str) phone_dict_str = json.dumps(phone_dict, ensure_ascii=False) print(phone_dict_str) # ----------------------------------------------------------------------------- def do_describe(args): """Describe IPA phones""" from gruut_ipa import Phoneme if args.phone: # From arguments phones = args.phone else: # From stdin phones = sys.stdin if os.isatty(sys.stdin.fileno()): print("Reading phones from stdin...", file=sys.stderr) for line in phones: line = line.strip() if line: line_phone = Phoneme(text=line) phone_str = json.dumps(line_phone.to_dict(), ensure_ascii=False) print(phone_str) sys.stdout.flush() # ----------------------------------------------------------------------------- def do_phones(args): """Group phones in IPA pronunciation""" from gruut_ipa import Pronunciation if args.pronunciation: # From arguments pronunciations = args.pronunciation else: # From stdin pronunciations = sys.stdin if os.isatty(sys.stdin.fileno()): print("Reading pronunciations from stdin...", file=sys.stderr) for line in pronunciations: line = line.strip() if line: line_pron = Pronunciation.from_string(line) phones_str = args.separator.join(p.text for p in line_pron if p.text) print(phones_str) sys.stdout.flush() # ----------------------------------------------------------------------------- def do_phonemes(args): """Group phones in IPA pronuncation according to language phonemes""" from gruut_ipa import Phonemes if args.pronunciation: # From arguments pronunciations = args.pronunciation else: # From stdin pronunciations = sys.stdin if os.isatty(sys.stdin.fileno()): print("Reading pronunciations from stdin...", file=sys.stderr) if args.phonemes_file: # Load phonemes from file phonemes_path = Path(args.phonemes_file) else: # Load phonemes using language code phonemes_path = _DATA_DIR / args.language / "phonemes.txt" # Check language support if not phonemes_path.is_file(): supported_languages = [d.name for d in _DATA_DIR.iterdir() if d.is_dir()] _LOGGER.fatal("Unsupported language: %s", args.language) _LOGGER.fatal("Supported languages: %s", supported_languages) sys.exit(1) _LOGGER.debug("Loading phonemes from %s", phonemes_path) with open(phonemes_path, "r", encoding="utf-8") as phonemes_file: phonemes = Phonemes.from_text(phonemes_file) for line in pronunciations: line = line.strip() if line: line_phonemes = phonemes.split( line, keep_stress=args.keep_stress, drop_tones=args.drop_tones ) phonemes_str = args.separator.join(p.text for p in line_phonemes if p.text) print(phonemes_str) sys.stdout.flush() # ----------------------------------------------------------------------------- def do_convert(args): """Convert pronunciations between different representations""" from gruut_ipa import Phoneme, Phonemes from gruut_ipa.espeak import espeak_to_ipa, ipa_to_espeak from gruut_ipa.sampa import ipa_to_sampa, sampa_to_ipa fixed_src_dest = {"ipa", "espeak", "sampa"} src_phonemes: typing.Optional[Phonemes] = None dest_phonemes: typing.Optional[Phonemes] = None if args.src not in fixed_src_dest: src_phonemes = Phonemes.from_language(args.src) if args.dest not in fixed_src_dest: dest_phoneme_map = Phonemes.from_language(args.dest).gruut_ipa_map # ipa -> original phoneme dest_phonemes = Phonemes() for k, v in dest_phoneme_map.items(): if v in dest_phonemes.gruut_ipa_map: continue dest_phonemes.phonemes.append(Phoneme(text=k, is_ipa=False)) dest_phonemes.ipa_map[v] = k dest_phonemes.update() if args.pronunciation: # From arguments pronunciations = args.pronunciation else: # From stdin pronunciations = sys.stdin if os.isatty(sys.stdin.fileno()): print("Reading pronunciations from stdin...", file=sys.stderr) for line in pronunciations: line = line.strip() if line: if args.src == "ipa": src_ipa = line elif args.src == "espeak": src_ipa = espeak_to_ipa(line) elif args.src == "sampa": src_ipa = sampa_to_ipa(line) else: assert src_phonemes is not None src_ipa = args.separator.join( src_phonemes.gruut_ipa_map.get(p.text, p.text) for p in src_phonemes.split(line) ) if args.dest == "ipa": dest_pron = src_ipa elif args.dest == "espeak": dest_pron = "[[" + ipa_to_espeak(src_ipa) + "]]" elif args.dest == "sampa": dest_pron = ipa_to_sampa(src_ipa) else: assert dest_phonemes is not None dest_pron = args.separator.join( p.text for p in dest_phonemes.split(src_ipa, is_ipa=False) ) print(dest_pron) sys.stdout.flush() # ----------------------------------------------------------------------------- def get_args() -> argparse.Namespace: """Parse command-line arguments""" parser = argparse.ArgumentParser(prog="gruut_ipa") # Create subparsers for each sub-command sub_parsers = parser.add_subparsers() sub_parsers.required = True sub_parsers.dest = "command" # ----- # print # ----- print_parser = sub_parsers.add_parser("print", help="Print all known IPA phones") print_parser.add_argument( "--language", help="Only print phones from a specific language or language/set" ) print_parser.set_defaults(func=do_print) # -------- # describe # -------- describe_parser = sub_parsers.add_parser("describe", help="Describe IPA phone(s)") describe_parser.set_defaults(func=do_describe) describe_parser.add_argument( "phone", nargs="*", help="IPA phones (read from stdin if not provided)" ) # -------- # phones # -------- phones_parser = sub_parsers.add_parser( "phones", help="Group phones in IPA pronunciation" ) phones_parser.set_defaults(func=do_phones) phones_parser.add_argument( "pronunciation", nargs="*", help="IPA pronunciations (read from stdin if not provided)", ) phones_parser.add_argument( "--separator", default=" ", help="Separator to add between phones in output (default: space)", ) # -------- # phonemes # -------- phonemes_parser = sub_parsers.add_parser( "phonemes", help="Group phones in IPA pronunciation according to language phonemes", ) phonemes_parser.set_defaults(func=do_phonemes) phonemes_parser.add_argument("language", help="Language code (e.g., en-us)") phonemes_parser.add_argument( "pronunciation", nargs="*", help="IPA pronunciations (read from stdin if not provided)", ) phonemes_parser.add_argument( "--separator", default=" ", help="Separator to add between phonemes in output (default: space)", ) phonemes_parser.add_argument( "--keep-stress", action="store_true", help="Keep primary/secondary stress markers", ) phonemes_parser.add_argument( "--drop-tones", action="store_true", help="Remove tone numbers/letters" ) phonemes_parser.add_argument( "--phonemes-file", help="Load phonemes from file instead of using language code" ) # ------- # convert # ------- convert_parser = sub_parsers.add_parser( "convert", help="Convert pronunciations between ipa, espeak, and sampa" ) convert_parser.set_defaults(func=do_convert) convert_parser.add_argument( "src", help="Source format (language, language/set, ipa, espeak, sampa)" ) convert_parser.add_argument( "dest", help="Destination format (language, language/set, ipa, espeak, sampa)" ) convert_parser.add_argument( "pronunciation", nargs="*", help="Pronunciations (read from stdin if not provided)", ) convert_parser.add_argument( "--separator", default=" ", help="Separator between phonemes (default: space)" ) # Shared arguments for sub_parser in [ print_parser, describe_parser, phones_parser, phonemes_parser, convert_parser, ]: sub_parser.add_argument( "--debug", action="store_true", help="Print DEBUG messages to console" ) return parser.parse_args() # ----------------------------------------------------------------------------- if __name__ == "__main__": main()