30 lines
808 B
Python
30 lines
808 B
Python
from .. import normalizers
|
|
|
|
|
|
Normalizer = normalizers.Normalizer
|
|
BertNormalizer = normalizers.BertNormalizer
|
|
NFD = normalizers.NFD
|
|
NFKD = normalizers.NFKD
|
|
NFC = normalizers.NFC
|
|
NFKC = normalizers.NFKC
|
|
Sequence = normalizers.Sequence
|
|
Lowercase = normalizers.Lowercase
|
|
Prepend = normalizers.Prepend
|
|
Strip = normalizers.Strip
|
|
StripAccents = normalizers.StripAccents
|
|
Nmt = normalizers.Nmt
|
|
Precompiled = normalizers.Precompiled
|
|
Replace = normalizers.Replace
|
|
|
|
|
|
NORMALIZERS = {"nfc": NFC, "nfd": NFD, "nfkc": NFKC, "nfkd": NFKD}
|
|
|
|
|
|
def unicode_normalizer_from_str(normalizer: str) -> Normalizer:
|
|
if normalizer not in NORMALIZERS:
|
|
raise ValueError(
|
|
"{} is not a known unicode normalizer. Available are {}".format(normalizer, NORMALIZERS.keys())
|
|
)
|
|
|
|
return NORMALIZERS[normalizer]()
|