52 lines
1.3 KiB
Python
52 lines
1.3 KiB
Python
"""Unicode to ASCII transliteration"""
|
|
|
|
from sys import intern
|
|
from zlib import MAX_WBITS, decompress
|
|
|
|
try:
|
|
from importlib.resources import files
|
|
|
|
def read_binary(package, resource):
|
|
return files(package).joinpath(resource).read_bytes()
|
|
|
|
except ImportError:
|
|
try:
|
|
from importlib.resources import read_binary
|
|
except ImportError:
|
|
from pkgutil import get_data as read_binary
|
|
|
|
__version__ = "0.3.2"
|
|
|
|
_blocks = {}
|
|
|
|
|
|
def anyascii(string):
|
|
# type: (str) -> str
|
|
"""Transliterate a string into ASCII."""
|
|
try:
|
|
if string.isascii():
|
|
return string
|
|
except AttributeError:
|
|
pass
|
|
result = []
|
|
for char in string:
|
|
codepoint = ord(char)
|
|
if codepoint <= 0x7F:
|
|
result.append(char)
|
|
continue
|
|
blocknum = codepoint >> 8
|
|
lo = codepoint & 0xFF
|
|
try:
|
|
block = _blocks[blocknum]
|
|
except KeyError:
|
|
try:
|
|
b = read_binary("anyascii._data", "%03x" % blocknum)
|
|
s = decompress(b, -MAX_WBITS).decode("ascii")
|
|
block = tuple(map(intern, s.split("\t")))
|
|
except FileNotFoundError:
|
|
block = ()
|
|
_blocks[blocknum] = block
|
|
if len(block) > lo:
|
|
result.append(block[lo])
|
|
return "".join(result)
|