ai-content-maker/.venv/Lib/site-packages/gruut/__init__.py

105 lines
2.9 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
"""gruut module"""
import itertools
import logging
import re
import sqlite3
import threading
import typing
from enum import Enum
from pathlib import Path
from gruut.const import KNOWN_LANGS, TextProcessorSettings
from gruut.resources import _DIR, _PACKAGE
from gruut.text_processor import Sentence, TextProcessor
from gruut.utils import resolve_lang
# -----------------------------------------------------------------------------
_LOGGER = logging.getLogger(_PACKAGE)
__version__ = (_DIR / "VERSION").read_text(encoding="utf-8").strip()
__author__ = "Michael Hansen (synesthesiam)"
__all__ = [
"sentences",
"is_language_supported",
"get_supported_languages",
"TextProcessor",
"TextProcessorSettings",
]
# -----------------------------------------------------------------------------
_LOCAL = threading.local()
_PROCESSORS_LOCK = threading.RLock()
def sentences(
text: str,
lang: str = "en_US",
ssml: bool = False,
espeak: bool = False,
major_breaks: bool = True,
minor_breaks: bool = True,
punctuations: bool = True,
explicit_lang: bool = True,
phonemes: bool = True,
break_phonemes: bool = True,
pos: bool = True,
**process_args,
) -> typing.Iterable[Sentence]:
"""
Process text and return sentences
Args:
text: input text or SSML (ssml=True)
lang: default language of input text
ssml: True if input text is SSML
espeak: True if eSpeak phonemes should be used
major_breaks: False if no sentence-breaking symbols in output
minor_breaks: False if no phrase-breaking symbols in output
punctuations: False if no word-surrounding symbols in output
**process_args: keyword arguments passed to TextProcessor.process
Returns:
sentences: iterable of Sentence objects
"""
model_prefix = "" if (not espeak) else "espeak"
with _PROCESSORS_LOCK:
if not hasattr(_LOCAL, "processors"):
_LOCAL.processors = {}
text_processor = _LOCAL.processors.get(model_prefix)
if text_processor is None:
text_processor = TextProcessor(default_lang=lang, model_prefix=model_prefix)
_LOCAL.processors[model_prefix] = text_processor
assert text_processor is not None
graph, root = text_processor(text, lang=lang, ssml=ssml, **process_args)
yield from text_processor.sentences(
graph,
root,
major_breaks=major_breaks,
minor_breaks=minor_breaks,
punctuations=punctuations,
explicit_lang=explicit_lang,
phonemes=phonemes,
break_phonemes=break_phonemes,
pos=pos,
)
# -----------------------------------------------------------------------------
def is_language_supported(lang: str) -> bool:
"""True if gruut supports lang"""
return resolve_lang(lang) in KNOWN_LANGS
def get_supported_languages() -> typing.Set[str]:
"""Set of supported gruut languages"""
return set(KNOWN_LANGS)