86 lines
2.7 KiB
Python
86 lines
2.7 KiB
Python
"""
|
|
Configuration utilities copied from spacy.util.
|
|
"""
|
|
import sys
|
|
from typing import Dict, Any, Tuple, Callable, Iterator, List, Optional, IO
|
|
import re
|
|
|
|
from spacy import Language
|
|
from spacy.util import registry
|
|
|
|
|
|
LoggerT = Callable[
|
|
[Language, IO, IO],
|
|
Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]],
|
|
]
|
|
|
|
|
|
def walk_dict(
|
|
node: Dict[str, Any], parent: List[str] = []
|
|
) -> Iterator[Tuple[List[str], Any]]:
|
|
"""Walk a dict and yield the path and values of the leaves."""
|
|
for key, value in node.items():
|
|
key_parent = [*parent, key]
|
|
if isinstance(value, dict):
|
|
yield from walk_dict(value, key_parent)
|
|
else:
|
|
yield (key_parent, value)
|
|
|
|
|
|
def dot_to_dict(values: Dict[str, Any]) -> Dict[str, dict]:
|
|
"""Convert dot notation to a dict. For example: {"token.pos": True,
|
|
"token._.xyz": True} becomes {"token": {"pos": True, "_": {"xyz": True }}}.
|
|
|
|
values (Dict[str, Any]): The key/value pairs to convert.
|
|
RETURNS (Dict[str, dict]): The converted values.
|
|
"""
|
|
result = {}
|
|
for key, value in values.items():
|
|
path = result
|
|
parts = key.lower().split(".")
|
|
for i, item in enumerate(parts):
|
|
is_last = i == len(parts) - 1
|
|
path = path.setdefault(item, value if is_last else {})
|
|
return result
|
|
|
|
|
|
def dict_to_dot(obj: Dict[str, dict]) -> Dict[str, Any]:
|
|
"""Convert dot notation to a dict. For example: {"token": {"pos": True,
|
|
"_": {"xyz": True }}} becomes {"token.pos": True, "token._.xyz": True}.
|
|
|
|
values (Dict[str, dict]): The dict to convert.
|
|
RETURNS (Dict[str, Any]): The key/value pairs.
|
|
"""
|
|
return {".".join(key): value for key, value in walk_dict(obj)}
|
|
|
|
|
|
def matcher_for_regex_patterns(
|
|
regexps: Optional[List[str]] = None,
|
|
) -> Callable[[str], bool]:
|
|
try:
|
|
compiled = []
|
|
if regexps is not None:
|
|
for regex in regexps:
|
|
compiled.append(re.compile(regex, flags=re.MULTILINE))
|
|
except re.error as err:
|
|
raise ValueError(
|
|
f"Regular expression `{regex}` couldn't be compiled for logger stats matcher"
|
|
) from err
|
|
|
|
def is_match(string: str) -> bool:
|
|
for regex in compiled:
|
|
if regex.search(string):
|
|
return True
|
|
return False
|
|
|
|
return is_match
|
|
|
|
|
|
def setup_default_console_logger(
|
|
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
|
|
) -> Tuple[Callable, Callable]:
|
|
console_logger = registry.get("loggers", "spacy.ConsoleLogger.v1")
|
|
console = console_logger(progress_bar=False)
|
|
console_log_step, console_finalize = console(nlp, stdout, stderr)
|
|
return console_log_step, console_finalize
|