111 lines
4.3 KiB
Python
111 lines
4.3 KiB
Python
import json
|
|
import os
|
|
import shutil
|
|
from collections import OrderedDict
|
|
|
|
import regex as re
|
|
from ruamel.yaml import RoundTripLoader
|
|
|
|
from dateparser_scripts.order_languages import avoid_languages
|
|
from dateparser_scripts.utils import combine_dicts
|
|
|
|
cldr_date_directory = '../dateparser_data/cldr_language_data/date_translation_data/'
|
|
supplementary_directory = '../dateparser_data/supplementary_language_data/'
|
|
supplementary_date_directory = '../dateparser_data/supplementary_language_data/date_translation_data/'
|
|
translation_data_directory = '../dateparser/data/'
|
|
date_translation_directory = '../dateparser/data/date_translation_data/'
|
|
|
|
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
cldr_languages = list(set(map(lambda x: x[:-5], os.listdir(cldr_date_directory))) - avoid_languages)
|
|
supplementary_languages = [x[:-5] for x in os.listdir(supplementary_date_directory)]
|
|
all_languages = set(cldr_languages).union(set(supplementary_languages))
|
|
|
|
RELATIVE_PATTERN = re.compile(r'\{0\}')
|
|
|
|
|
|
def _modify_relative_data(relative_data):
|
|
modified_relative_data = OrderedDict()
|
|
for key, value in relative_data.items():
|
|
for i, string in enumerate(value):
|
|
string = RELATIVE_PATTERN.sub(r'(\\d+[.,]?\\d*)', string)
|
|
value[i] = string
|
|
modified_relative_data[key] = value
|
|
return modified_relative_data
|
|
|
|
|
|
def _modify_data(language_data):
|
|
relative_data = language_data.get("relative-type-regex", {})
|
|
relative_data = _modify_relative_data(relative_data)
|
|
locale_specific_data = language_data.get("locale_specific", {})
|
|
for _, info in locale_specific_data.items():
|
|
locale_relative_data = info.get("relative-type-regex", {})
|
|
locale_relative_data = _modify_relative_data(locale_relative_data)
|
|
|
|
|
|
def _get_complete_date_translation_data(language):
|
|
cldr_data = {}
|
|
supplementary_data = {}
|
|
if language in cldr_languages:
|
|
with open(cldr_date_directory + language + '.json') as f:
|
|
cldr_data = json.load(f, object_pairs_hook=OrderedDict)
|
|
if language in supplementary_languages:
|
|
with open(supplementary_date_directory + language + '.yaml') as g:
|
|
supplementary_data = OrderedDict(RoundTripLoader(g).get_data())
|
|
complete_data = combine_dicts(cldr_data, supplementary_data)
|
|
if 'name' not in complete_data:
|
|
complete_data['name'] = language
|
|
return complete_data
|
|
|
|
|
|
def _write_file(filename, text, mode, in_memory, in_memory_result):
|
|
if in_memory:
|
|
in_memory_result[filename] = text
|
|
else:
|
|
with open(filename, mode) as out:
|
|
out.write(text)
|
|
|
|
|
|
def write_complete_data(in_memory=False):
|
|
"""
|
|
This function is responsible of generating the needed py files from the
|
|
CLDR files (JSON format) and supplementary language data (YAML format).
|
|
|
|
Use it with in_memory=True to avoid writing real files and getting a
|
|
dictionary containing the file names and their content (used when testing).
|
|
"""
|
|
in_memory_result = {}
|
|
|
|
if not in_memory:
|
|
if not os.path.isdir(translation_data_directory):
|
|
os.mkdir(translation_data_directory)
|
|
if os.path.isdir(date_translation_directory):
|
|
shutil.rmtree(date_translation_directory)
|
|
os.mkdir(date_translation_directory)
|
|
|
|
with open(supplementary_directory + 'base_data.yaml') as f:
|
|
base_data = RoundTripLoader(f).get_data()
|
|
|
|
for language in all_languages:
|
|
date_translation_data = _get_complete_date_translation_data(language)
|
|
date_translation_data = combine_dicts(date_translation_data, base_data)
|
|
_modify_data(date_translation_data)
|
|
translation_data = json.dumps(date_translation_data, indent=4, separators=(',', ': '),
|
|
ensure_ascii=False)
|
|
out_text = ('info = ' + translation_data + '\n').encode('utf-8')
|
|
_write_file(date_translation_directory + language + '.py', out_text, 'wb', in_memory, in_memory_result)
|
|
|
|
init_text = (
|
|
"from dateparser.data import date_translation_data\n"
|
|
"from .languages_info import language_order, language_locale_dict\n"
|
|
)
|
|
|
|
_write_file(translation_data_directory + '__init__.py', init_text, 'w', False, in_memory_result)
|
|
_write_file(date_translation_directory + '__init__.py', '', 'w', False, in_memory_result)
|
|
|
|
return in_memory_result
|
|
|
|
|
|
if __name__ == '__main__':
|
|
write_complete_data()
|