ai-content-maker/.venv/Lib/site-packages/dateparser/search/text_detection.py

67 lines
2.9 KiB
Python

from dateparser.search.detection import BaseLanguageDetector
from dateparser.conf import apply_settings
from dateparser.utils import normalize_unicode
class FullTextLanguageDetector(BaseLanguageDetector):
def __init__(self, languages):
super(BaseLanguageDetector, self).__init__()
self.languages = languages[:]
self.language_unique_chars = []
self.language_chars = []
def get_unique_characters(self, settings):
settings = settings.replace(NORMALIZE=False)
for language in self.languages:
chars = language.get_wordchars_for_detection(settings=settings)
self.language_chars.append(chars)
for char_set in self.language_chars:
unique_chars = char_set
for other_char_set in self.language_chars:
if other_char_set != char_set:
unique_chars = unique_chars - other_char_set
self.language_unique_chars.append(unique_chars)
def character_check(self, date_string, settings):
date_string_set = set(date_string.lower())
symbol_set = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
" ", "/", "-", ")", "(", ".", ":", "\\", ",", "'"}
if date_string_set & symbol_set == date_string_set:
self.languages = [self.languages[0]]
return
self.get_unique_characters(settings=settings)
for i in range(len(self.languages)):
for char in self.language_unique_chars[i]:
if char.lower() in date_string.lower():
self.languages = [self.languages[i]]
return
indices_to_pop = []
for i in range(len(self.languages)):
if len(date_string_set & self.language_chars[i]) == 0:
indices_to_pop.append(i)
self.languages = [i for j, i in enumerate(self.languages)
if j not in indices_to_pop]
@apply_settings
def _best_language(self, date_string, settings=None):
self.character_check(date_string, settings)
date_string = normalize_unicode(date_string.lower())
if len(self.languages) == 1:
return self.languages[0].shortname
applicable_languages = []
for language in self.languages:
num_words = language.count_applicability(
date_string, strip_timezone=False, settings=settings)
if num_words[0] > 0 or num_words[1] > 0:
applicable_languages.append((language.shortname, num_words))
else:
num_words = language.count_applicability(
date_string, strip_timezone=True, settings=settings)
if num_words[0] > 0 or num_words[1] > 0:
applicable_languages.append((language.shortname, num_words))
if not applicable_languages:
return None
return max(applicable_languages, key=lambda p: (p[1][0], p[1][1]))[0]