import json import os import shutil from collections import OrderedDict import regex as re from dateparser_scripts.order_languages import _get_language_locale_dict from dateparser_scripts.utils import get_dict_difference, get_raw_data APOSTROPHE_LOOK_ALIKE_CHARS = [ '\N{RIGHT SINGLE QUOTATION MARK}', # '\u2019' '\N{MODIFIER LETTER APOSTROPHE}', # '\u02bc' '\N{MODIFIER LETTER TURNED COMMA}', # '\u02bb' '\N{ARMENIAN APOSTROPHE}', # '\u055a' '\N{LATIN SMALL LETTER SALTILLO}', # '\ua78c' '\N{PRIME}', # '\u2032' '\N{REVERSED PRIME}', # '\u2035' '\N{MODIFIER LETTER PRIME}', # '\u02b9' '\N{FULLWIDTH APOSTROPHE}', # '\uff07' ] DATE_ORDER_PATTERN = re.compile('([DMY])+\u200f*[-/. \t]*([DMY])+\u200f*[-/. \t]*([DMY])+') RELATIVE_PATTERN = re.compile(r'(?