12 lines
267 B
Python
12 lines
267 B
Python
|
from ..char_classes import LIST_QUOTES
|
||
|
from ..punctuation import TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES
|
||
|
|
||
|
_infixes = (
|
||
|
["·", "ㆍ", r"\(", r"\)"]
|
||
|
+ [r"(?<=[0-9])~(?=[0-9-])"]
|
||
|
+ LIST_QUOTES
|
||
|
+ BASE_TOKENIZER_INFIXES
|
||
|
)
|
||
|
|
||
|
TOKENIZER_INFIXES = _infixes
|