ai-content-maker/.venv/Lib/site-packages/spacy_loggers/util.py

86 lines
2.7 KiB
Python
Raw Permalink Normal View History

2024-05-03 04:18:51 +03:00
"""
Configuration utilities copied from spacy.util.
"""
import sys
from typing import Dict, Any, Tuple, Callable, Iterator, List, Optional, IO
import re
from spacy import Language
from spacy.util import registry
LoggerT = Callable[
[Language, IO, IO],
Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]],
]
def walk_dict(
node: Dict[str, Any], parent: List[str] = []
) -> Iterator[Tuple[List[str], Any]]:
"""Walk a dict and yield the path and values of the leaves."""
for key, value in node.items():
key_parent = [*parent, key]
if isinstance(value, dict):
yield from walk_dict(value, key_parent)
else:
yield (key_parent, value)
def dot_to_dict(values: Dict[str, Any]) -> Dict[str, dict]:
"""Convert dot notation to a dict. For example: {"token.pos": True,
"token._.xyz": True} becomes {"token": {"pos": True, "_": {"xyz": True }}}.
values (Dict[str, Any]): The key/value pairs to convert.
RETURNS (Dict[str, dict]): The converted values.
"""
result = {}
for key, value in values.items():
path = result
parts = key.lower().split(".")
for i, item in enumerate(parts):
is_last = i == len(parts) - 1
path = path.setdefault(item, value if is_last else {})
return result
def dict_to_dot(obj: Dict[str, dict]) -> Dict[str, Any]:
"""Convert dot notation to a dict. For example: {"token": {"pos": True,
"_": {"xyz": True }}} becomes {"token.pos": True, "token._.xyz": True}.
values (Dict[str, dict]): The dict to convert.
RETURNS (Dict[str, Any]): The key/value pairs.
"""
return {".".join(key): value for key, value in walk_dict(obj)}
def matcher_for_regex_patterns(
regexps: Optional[List[str]] = None,
) -> Callable[[str], bool]:
try:
compiled = []
if regexps is not None:
for regex in regexps:
compiled.append(re.compile(regex, flags=re.MULTILINE))
except re.error as err:
raise ValueError(
f"Regular expression `{regex}` couldn't be compiled for logger stats matcher"
) from err
def is_match(string: str) -> bool:
for regex in compiled:
if regex.search(string):
return True
return False
return is_match
def setup_default_console_logger(
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
) -> Tuple[Callable, Callable]:
console_logger = registry.get("loggers", "spacy.ConsoleLogger.v1")
console = console_logger(progress_bar=False)
console_log_step, console_finalize = console(nlp, stdout, stderr)
return console_log_step, console_finalize