30 lines
808 B
Python
30 lines
808 B
Python
|
from .. import normalizers
|
||
|
|
||
|
|
||
|
Normalizer = normalizers.Normalizer
|
||
|
BertNormalizer = normalizers.BertNormalizer
|
||
|
NFD = normalizers.NFD
|
||
|
NFKD = normalizers.NFKD
|
||
|
NFC = normalizers.NFC
|
||
|
NFKC = normalizers.NFKC
|
||
|
Sequence = normalizers.Sequence
|
||
|
Lowercase = normalizers.Lowercase
|
||
|
Prepend = normalizers.Prepend
|
||
|
Strip = normalizers.Strip
|
||
|
StripAccents = normalizers.StripAccents
|
||
|
Nmt = normalizers.Nmt
|
||
|
Precompiled = normalizers.Precompiled
|
||
|
Replace = normalizers.Replace
|
||
|
|
||
|
|
||
|
NORMALIZERS = {"nfc": NFC, "nfd": NFD, "nfkc": NFKC, "nfkd": NFKD}
|
||
|
|
||
|
|
||
|
def unicode_normalizer_from_str(normalizer: str) -> Normalizer:
|
||
|
if normalizer not in NORMALIZERS:
|
||
|
raise ValueError(
|
||
|
"{} is not a known unicode normalizer. Available are {}".format(normalizer, NORMALIZERS.keys())
|
||
|
)
|
||
|
|
||
|
return NORMALIZERS[normalizer]()
|