524 lines
20 KiB
Python
524 lines
20 KiB
Python
|
import collections
|
|||
|
import sys
|
|||
|
from collections.abc import Set
|
|||
|
from datetime import datetime, timedelta
|
|||
|
|
|||
|
import regex as re
|
|||
|
from tzlocal import get_localzone
|
|||
|
from dateutil.relativedelta import relativedelta
|
|||
|
|
|||
|
from dateparser.date_parser import date_parser
|
|||
|
from dateparser.freshness_date_parser import freshness_date_parser
|
|||
|
from dateparser.languages.loader import LocaleDataLoader
|
|||
|
from dateparser.conf import apply_settings, check_settings
|
|||
|
from dateparser.parser import _parse_absolute, _parse_nospaces
|
|||
|
from dateparser.timezone_parser import pop_tz_offset_from_string
|
|||
|
from dateparser.utils import apply_timezone_from_settings, \
|
|||
|
set_correct_day_from_settings, \
|
|||
|
get_timezone_from_tz_string
|
|||
|
from dateparser.custom_language_detection.language_mapping import map_languages
|
|||
|
|
|||
|
APOSTROPHE_LOOK_ALIKE_CHARS = [
|
|||
|
'\N{RIGHT SINGLE QUOTATION MARK}', # '\u2019'
|
|||
|
'\N{MODIFIER LETTER APOSTROPHE}', # '\u02bc'
|
|||
|
'\N{MODIFIER LETTER TURNED COMMA}', # '\u02bb'
|
|||
|
'\N{ARMENIAN APOSTROPHE}', # '\u055a'
|
|||
|
'\N{LATIN SMALL LETTER SALTILLO}', # '\ua78c'
|
|||
|
'\N{PRIME}', # '\u2032'
|
|||
|
'\N{REVERSED PRIME}', # '\u2035'
|
|||
|
'\N{MODIFIER LETTER PRIME}', # '\u02b9'
|
|||
|
'\N{FULLWIDTH APOSTROPHE}', # '\uff07'
|
|||
|
]
|
|||
|
|
|||
|
RE_NBSP = re.compile('\xa0', flags=re.UNICODE)
|
|||
|
RE_SPACES = re.compile(r'\s+')
|
|||
|
RE_TRIM_SPACES = re.compile(r'^\s+(\S.*?)\s+$')
|
|||
|
RE_TRIM_COLONS = re.compile(r'(\S.*?):*$')
|
|||
|
|
|||
|
RE_SANITIZE_SKIP = re.compile(r'\t|\n|\r|\u00bb|,\s\u0432\b|\u200e|\xb7|\u200f|\u064e|\u064f', flags=re.M)
|
|||
|
RE_SANITIZE_RUSSIAN = re.compile(r'([\W\d])\u0433\.', flags=re.I | re.U)
|
|||
|
RE_SANITIZE_CROATIAN = re.compile(r'(\d+)\.\s?(\d+)\.\s?(\d+)\.( u)?', flags=re.I | re.U)
|
|||
|
RE_SANITIZE_PERIOD = re.compile(r'(?<=[^0-9\s])\.', flags=re.U)
|
|||
|
RE_SANITIZE_ON = re.compile(r'^.*?on:\s+(.*)')
|
|||
|
RE_SANITIZE_APOSTROPHE = re.compile('|'.join(APOSTROPHE_LOOK_ALIKE_CHARS))
|
|||
|
|
|||
|
RE_SEARCH_TIMESTAMP = re.compile(r'^(\d{10})(\d{3})?(\d{3})?(?![^.])')
|
|||
|
RE_SEARCH_NEGATIVE_TIMESTAMP = re.compile(r'^([-]\d{10})(\d{3})?(\d{3})?(?![^.])')
|
|||
|
|
|||
|
|
|||
|
def sanitize_spaces(date_string):
|
|||
|
date_string = RE_NBSP.sub(' ', date_string)
|
|||
|
date_string = RE_SPACES.sub(' ', date_string)
|
|||
|
date_string = RE_TRIM_SPACES.sub(r'\1', date_string)
|
|||
|
return date_string
|
|||
|
|
|||
|
|
|||
|
def date_range(begin, end, **kwargs):
|
|||
|
dateutil_error_prone_args = ['year', 'month', 'week', 'day', 'hour',
|
|||
|
'minute', 'second']
|
|||
|
for arg in dateutil_error_prone_args:
|
|||
|
if arg in kwargs:
|
|||
|
raise ValueError("Invalid argument: %s" % arg)
|
|||
|
|
|||
|
step = relativedelta(**kwargs) if kwargs else relativedelta(days=1)
|
|||
|
|
|||
|
date = begin
|
|||
|
while date < end:
|
|||
|
yield date
|
|||
|
date += step
|
|||
|
|
|||
|
# handles edge-case when iterating months and last interval is < 30 days
|
|||
|
if kwargs.get('months', 0) > 0 and (date.year, date.month) == (end.year, end.month):
|
|||
|
yield end
|
|||
|
|
|||
|
|
|||
|
def get_intersecting_periods(low, high, period='day'):
|
|||
|
if period not in ['year', 'month', 'week', 'day', 'hour', 'minute', 'second', 'microsecond']:
|
|||
|
raise ValueError("Invalid period: {}".format(period))
|
|||
|
|
|||
|
if high <= low:
|
|||
|
return
|
|||
|
|
|||
|
step = relativedelta(**{period + 's': 1})
|
|||
|
|
|||
|
current_period_start = low
|
|||
|
if isinstance(current_period_start, datetime):
|
|||
|
reset_arguments = {}
|
|||
|
for test_period in ['microsecond', 'second', 'minute', 'hour']:
|
|||
|
if test_period == period:
|
|||
|
break
|
|||
|
else:
|
|||
|
reset_arguments[test_period] = 0
|
|||
|
current_period_start = current_period_start.replace(**reset_arguments)
|
|||
|
|
|||
|
if period == 'week':
|
|||
|
current_period_start \
|
|||
|
= current_period_start - timedelta(days=current_period_start.weekday())
|
|||
|
elif period == 'month':
|
|||
|
current_period_start = current_period_start.replace(day=1)
|
|||
|
elif period == 'year':
|
|||
|
current_period_start = current_period_start.replace(month=1, day=1)
|
|||
|
|
|||
|
while current_period_start < high:
|
|||
|
yield current_period_start
|
|||
|
current_period_start += step
|
|||
|
|
|||
|
|
|||
|
def sanitize_date(date_string):
|
|||
|
date_string = RE_SANITIZE_SKIP.sub(' ', date_string)
|
|||
|
date_string = RE_SANITIZE_RUSSIAN.sub(r'\1 ', date_string) # remove 'г.' (Russian for year) but not in words
|
|||
|
date_string = RE_SANITIZE_CROATIAN.sub(r'\1.\2.\3 ', date_string) # extra '.' and 'u' interferes with parsing relative fractional dates
|
|||
|
date_string = sanitize_spaces(date_string)
|
|||
|
date_string = RE_SANITIZE_PERIOD.sub('', date_string)
|
|||
|
date_string = RE_SANITIZE_ON.sub(r'\1', date_string)
|
|||
|
date_string = RE_TRIM_COLONS.sub(r'\1', date_string)
|
|||
|
date_string = RE_SANITIZE_APOSTROPHE.sub("'", date_string)
|
|||
|
date_string = date_string.strip()
|
|||
|
return date_string
|
|||
|
|
|||
|
|
|||
|
def get_date_from_timestamp(date_string, settings, negative=False):
|
|||
|
if negative:
|
|||
|
match = RE_SEARCH_NEGATIVE_TIMESTAMP.search(date_string)
|
|||
|
else:
|
|||
|
match = RE_SEARCH_TIMESTAMP.search(date_string)
|
|||
|
|
|||
|
if match:
|
|||
|
if (settings is None or
|
|||
|
settings.TIMEZONE is None or
|
|||
|
'local' in settings.TIMEZONE.lower()):
|
|||
|
# If the timezone in settings is unset, or it's 'local', use the
|
|||
|
# local timezone
|
|||
|
timezone = get_localzone()
|
|||
|
else:
|
|||
|
# Otherwise, use the timezone given in settings
|
|||
|
timezone = get_timezone_from_tz_string(settings.TIMEZONE)
|
|||
|
|
|||
|
seconds = int(match.group(1))
|
|||
|
millis = int(match.group(2) or 0)
|
|||
|
micros = int(match.group(3) or 0)
|
|||
|
date_obj = (datetime
|
|||
|
.fromtimestamp(seconds, timezone)
|
|||
|
.replace(microsecond=millis * 1000 + micros, tzinfo=None)
|
|||
|
)
|
|||
|
date_obj = apply_timezone_from_settings(date_obj, settings)
|
|||
|
return date_obj
|
|||
|
|
|||
|
|
|||
|
def parse_with_formats(date_string, date_formats, settings):
|
|||
|
""" Parse with formats and return a dictionary with 'period' and 'obj_date'.
|
|||
|
|
|||
|
:returns: :class:`datetime.datetime`, dict or None
|
|||
|
|
|||
|
"""
|
|||
|
period = 'day'
|
|||
|
for date_format in date_formats:
|
|||
|
try:
|
|||
|
date_obj = datetime.strptime(date_string, date_format)
|
|||
|
except ValueError:
|
|||
|
continue
|
|||
|
else:
|
|||
|
if '%d' not in date_format:
|
|||
|
period = 'month'
|
|||
|
date_obj = set_correct_day_from_settings(date_obj, settings)
|
|||
|
|
|||
|
if not ('%y' in date_format or '%Y' in date_format):
|
|||
|
today = datetime.today()
|
|||
|
date_obj = date_obj.replace(year=today.year)
|
|||
|
|
|||
|
date_obj = apply_timezone_from_settings(date_obj, settings)
|
|||
|
|
|||
|
return DateData(date_obj=date_obj, period=period)
|
|||
|
else:
|
|||
|
return DateData(date_obj=None, period=period)
|
|||
|
|
|||
|
|
|||
|
class _DateLocaleParser:
|
|||
|
|
|||
|
def __init__(self, locale, date_string, date_formats, settings=None):
|
|||
|
self._settings = settings
|
|||
|
if not (date_formats is None or isinstance(date_formats, (list, tuple, Set))):
|
|||
|
raise TypeError("Date formats should be list, tuple or set of strings")
|
|||
|
|
|||
|
self.locale = locale
|
|||
|
self.date_string = date_string
|
|||
|
self.date_formats = date_formats
|
|||
|
self._translated_date = None
|
|||
|
self._translated_date_with_formatting = None
|
|||
|
self._parsers = {
|
|||
|
'timestamp': self._try_timestamp,
|
|||
|
'negative-timestamp': self._try_negative_timestamp,
|
|||
|
'relative-time': self._try_freshness_parser,
|
|||
|
'custom-formats': self._try_given_formats,
|
|||
|
'absolute-time': self._try_absolute_parser,
|
|||
|
'no-spaces-time': self._try_nospaces_parser,
|
|||
|
}
|
|||
|
|
|||
|
@classmethod
|
|||
|
def parse(cls, locale, date_string, date_formats=None, settings=None):
|
|||
|
instance = cls(locale, date_string, date_formats, settings)
|
|||
|
return instance._parse()
|
|||
|
|
|||
|
def _parse(self):
|
|||
|
for parser_name in self._settings.PARSERS:
|
|||
|
date_data = self._parsers[parser_name]()
|
|||
|
if self._is_valid_date_data(date_data):
|
|||
|
return date_data
|
|||
|
else:
|
|||
|
return None
|
|||
|
|
|||
|
def _try_timestamp_parser(self, negative=False):
|
|||
|
return DateData(
|
|||
|
date_obj=get_date_from_timestamp(self.date_string, self._settings, negative=negative),
|
|||
|
period='time' if self._settings.RETURN_TIME_AS_PERIOD else 'day',
|
|||
|
)
|
|||
|
|
|||
|
def _try_timestamp(self):
|
|||
|
return self._try_timestamp_parser()
|
|||
|
|
|||
|
def _try_negative_timestamp(self):
|
|||
|
return self._try_timestamp_parser(negative=True)
|
|||
|
|
|||
|
def _try_freshness_parser(self):
|
|||
|
try:
|
|||
|
return freshness_date_parser.get_date_data(self._get_translated_date(), self._settings)
|
|||
|
except (OverflowError, ValueError):
|
|||
|
return None
|
|||
|
|
|||
|
def _try_absolute_parser(self):
|
|||
|
return self._try_parser(parse_method=_parse_absolute)
|
|||
|
|
|||
|
def _try_nospaces_parser(self):
|
|||
|
return self._try_parser(parse_method=_parse_nospaces)
|
|||
|
|
|||
|
def _try_parser(self, parse_method):
|
|||
|
_order = self._settings.DATE_ORDER
|
|||
|
try:
|
|||
|
if self._settings.PREFER_LOCALE_DATE_ORDER:
|
|||
|
if 'DATE_ORDER' not in self._settings._mod_settings:
|
|||
|
self._settings.DATE_ORDER = self.locale.info.get('date_order', _order)
|
|||
|
date_obj, period = date_parser.parse(
|
|||
|
self._get_translated_date(), parse_method=parse_method, settings=self._settings)
|
|||
|
self._settings.DATE_ORDER = _order
|
|||
|
return DateData(
|
|||
|
date_obj=date_obj,
|
|||
|
period=period,
|
|||
|
)
|
|||
|
except ValueError:
|
|||
|
self._settings.DATE_ORDER = _order
|
|||
|
return None
|
|||
|
|
|||
|
def _try_given_formats(self):
|
|||
|
if not self.date_formats:
|
|||
|
return
|
|||
|
|
|||
|
return parse_with_formats(
|
|||
|
self._get_translated_date_with_formatting(),
|
|||
|
self.date_formats, settings=self._settings
|
|||
|
)
|
|||
|
|
|||
|
def _get_translated_date(self):
|
|||
|
if self._translated_date is None:
|
|||
|
self._translated_date = self.locale.translate(
|
|||
|
self.date_string, keep_formatting=False, settings=self._settings)
|
|||
|
return self._translated_date
|
|||
|
|
|||
|
def _get_translated_date_with_formatting(self):
|
|||
|
if self._translated_date_with_formatting is None:
|
|||
|
self._translated_date_with_formatting = self.locale.translate(
|
|||
|
self.date_string, keep_formatting=True, settings=self._settings)
|
|||
|
return self._translated_date_with_formatting
|
|||
|
|
|||
|
def _is_valid_date_data(self, date_data):
|
|||
|
if not isinstance(date_data, DateData):
|
|||
|
return False
|
|||
|
if not date_data['date_obj'] or not date_data['period']:
|
|||
|
return False
|
|||
|
if date_data['date_obj'] and not isinstance(date_data['date_obj'], datetime):
|
|||
|
return False
|
|||
|
if date_data['period'] not in ('time', 'day', 'week', 'month', 'year'):
|
|||
|
return False
|
|||
|
return True
|
|||
|
|
|||
|
|
|||
|
class DateData:
|
|||
|
"""
|
|||
|
Class that represents the parsed data with useful information.
|
|||
|
It can be accessed with square brackets like a dict object.
|
|||
|
"""
|
|||
|
|
|||
|
def __init__(self, *, date_obj=None, period=None, locale=None):
|
|||
|
self.date_obj = date_obj
|
|||
|
self.period = period
|
|||
|
self.locale = locale
|
|||
|
|
|||
|
def __getitem__(self, k):
|
|||
|
if not hasattr(self, k):
|
|||
|
raise KeyError(k)
|
|||
|
return getattr(self, k)
|
|||
|
|
|||
|
def __setitem__(self, k, v):
|
|||
|
if not hasattr(self, k):
|
|||
|
raise KeyError(k)
|
|||
|
setattr(self, k, v)
|
|||
|
|
|||
|
def __repr__(self):
|
|||
|
properties_text = ', '.join('{}={}'.format(prop, val.__repr__()) for prop, val in self.__dict__.items())
|
|||
|
|
|||
|
return '{}({})'.format(
|
|||
|
self.__class__.__name__, properties_text
|
|||
|
)
|
|||
|
|
|||
|
|
|||
|
class DateDataParser:
|
|||
|
"""
|
|||
|
Class which handles language detection, translation and subsequent generic parsing of
|
|||
|
string representing date and/or time.
|
|||
|
|
|||
|
:param languages:
|
|||
|
A list of language codes, e.g. ['en', 'es', 'zh-Hant'].
|
|||
|
If locales are not given, languages and region are
|
|||
|
used to construct locales for translation.
|
|||
|
:type languages: list
|
|||
|
|
|||
|
:param locales:
|
|||
|
A list of locale codes, e.g. ['fr-PF', 'qu-EC', 'af-NA'].
|
|||
|
The parser uses only these locales to translate date string.
|
|||
|
:type locales: list
|
|||
|
|
|||
|
:param region:
|
|||
|
A region code, e.g. 'IN', '001', 'NE'.
|
|||
|
If locales are not given, languages and region are
|
|||
|
used to construct locales for translation.
|
|||
|
:type region: str
|
|||
|
|
|||
|
:param try_previous_locales:
|
|||
|
If True, locales previously used to translate date are tried first.
|
|||
|
:type try_previous_locales: bool
|
|||
|
|
|||
|
:param use_given_order:
|
|||
|
If True, locales are tried for translation of date string
|
|||
|
in the order in which they are given.
|
|||
|
:type use_given_order: bool
|
|||
|
|
|||
|
:param settings:
|
|||
|
Configure customized behavior using settings defined in :mod:`dateparser.conf.Settings`.
|
|||
|
:type settings: dict
|
|||
|
|
|||
|
:param detect_languages_function:
|
|||
|
A function for language detection that takes as input a `text` and a `confidence_threshold`,
|
|||
|
and returns a list of detected language codes.
|
|||
|
Note: this function is only used if ``languages`` and ``locales`` are not provided.
|
|||
|
:type detect_languages_function: function
|
|||
|
|
|||
|
:return: A parser instance
|
|||
|
|
|||
|
:raises:
|
|||
|
``ValueError``: Unknown Language, ``TypeError``: Languages argument must be a list,
|
|||
|
``SettingValidationError``: A provided setting is not valid.
|
|||
|
"""
|
|||
|
|
|||
|
locale_loader = None
|
|||
|
|
|||
|
@apply_settings
|
|||
|
def __init__(self, languages=None, locales=None, region=None, try_previous_locales=False,
|
|||
|
use_given_order=False, settings=None, detect_languages_function=None):
|
|||
|
|
|||
|
if languages is not None and not isinstance(languages, (list, tuple, Set)):
|
|||
|
raise TypeError("languages argument must be a list (%r given)" % type(languages))
|
|||
|
|
|||
|
if locales is not None and not isinstance(locales, (list, tuple, Set)):
|
|||
|
raise TypeError("locales argument must be a list (%r given)" % type(locales))
|
|||
|
|
|||
|
if region is not None and not isinstance(region, str):
|
|||
|
raise TypeError("region argument must be str (%r given)" % type(region))
|
|||
|
|
|||
|
if not isinstance(try_previous_locales, bool):
|
|||
|
raise TypeError("try_previous_locales argument must be a boolean (%r given)"
|
|||
|
% type(try_previous_locales))
|
|||
|
|
|||
|
if not isinstance(use_given_order, bool):
|
|||
|
raise TypeError("use_given_order argument must be a boolean (%r given)"
|
|||
|
% type(use_given_order))
|
|||
|
|
|||
|
if not locales and not languages and use_given_order:
|
|||
|
raise ValueError("locales or languages must be given if use_given_order is True")
|
|||
|
|
|||
|
check_settings(settings)
|
|||
|
|
|||
|
self._settings = settings
|
|||
|
self.try_previous_locales = try_previous_locales
|
|||
|
self.use_given_order = use_given_order
|
|||
|
self.languages = list(languages) if languages else None
|
|||
|
self.locales = locales
|
|||
|
self.region = region
|
|||
|
self.detect_languages_function = detect_languages_function
|
|||
|
self.previous_locales = collections.OrderedDict()
|
|||
|
|
|||
|
def get_date_data(self, date_string, date_formats=None):
|
|||
|
"""
|
|||
|
Parse string representing date and/or time in recognizable localized formats.
|
|||
|
Supports parsing multiple languages and timezones.
|
|||
|
|
|||
|
:param date_string:
|
|||
|
A string representing date and/or time in a recognizably valid format.
|
|||
|
:type date_string: str
|
|||
|
:param date_formats:
|
|||
|
A list of format strings using directives as given
|
|||
|
`here <https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior>`_.
|
|||
|
The parser applies formats one by one, taking into account the detected languages.
|
|||
|
:type date_formats: list
|
|||
|
|
|||
|
:return: a ``DateData`` object.
|
|||
|
|
|||
|
:raises: ValueError - Unknown Language
|
|||
|
|
|||
|
.. note:: *Period* values can be a 'day' (default), 'week', 'month', 'year', 'time'.
|
|||
|
|
|||
|
*Period* represents the granularity of date parsed from the given string.
|
|||
|
|
|||
|
In the example below, since no day information is present, the day is assumed to be current
|
|||
|
day ``16`` from *current date* (which is June 16, 2015, at the moment of writing this).
|
|||
|
Hence, the level of precision is ``month``:
|
|||
|
|
|||
|
>>> DateDataParser().get_date_data('March 2015')
|
|||
|
DateData(date_obj=datetime.datetime(2015, 3, 16, 0, 0), period='month', locale='en')
|
|||
|
|
|||
|
Similarly, for date strings with no day and month information present, level of precision
|
|||
|
is ``year`` and day ``16`` and month ``6`` are from *current_date*.
|
|||
|
|
|||
|
>>> DateDataParser().get_date_data('2014')
|
|||
|
DateData(date_obj=datetime.datetime(2014, 6, 16, 0, 0), period='year', locale='en')
|
|||
|
|
|||
|
Dates with time zone indications or UTC offsets are returned in UTC time unless
|
|||
|
specified using `Settings <https://dateparser.readthedocs.io/en/latest/settings.html#settings>`__.
|
|||
|
|
|||
|
>>> DateDataParser().get_date_data('23 March 2000, 1:21 PM CET')
|
|||
|
DateData(date_obj=datetime.datetime(2000, 3, 23, 13, 21, tzinfo=<StaticTzInfo 'CET'>),
|
|||
|
period='day', locale='en')
|
|||
|
|
|||
|
"""
|
|||
|
if not isinstance(date_string, str):
|
|||
|
raise TypeError('Input type must be str')
|
|||
|
|
|||
|
res = parse_with_formats(date_string, date_formats or [], self._settings)
|
|||
|
if res['date_obj']:
|
|||
|
return res
|
|||
|
|
|||
|
date_string = sanitize_date(date_string)
|
|||
|
|
|||
|
for locale in self._get_applicable_locales(date_string):
|
|||
|
parsed_date = _DateLocaleParser.parse(
|
|||
|
locale, date_string, date_formats, settings=self._settings)
|
|||
|
if parsed_date:
|
|||
|
parsed_date['locale'] = locale.shortname
|
|||
|
if self.try_previous_locales:
|
|||
|
self.previous_locales[locale] = None
|
|||
|
return parsed_date
|
|||
|
else:
|
|||
|
return DateData(date_obj=None, period='day', locale=None)
|
|||
|
|
|||
|
def get_date_tuple(self, *args, **kwargs):
|
|||
|
date_data = self.get_date_data(*args, **kwargs)
|
|||
|
fields = date_data.__dict__.keys()
|
|||
|
date_tuple = collections.namedtuple('DateData', fields)
|
|||
|
return date_tuple(**date_data.__dict__)
|
|||
|
|
|||
|
def _get_applicable_locales(self, date_string):
|
|||
|
pop_tz_cache = []
|
|||
|
|
|||
|
def date_strings():
|
|||
|
""" A generator instead of a static list to avoid calling
|
|||
|
pop_tz_offset_from_string if the first locale matches on unmodified
|
|||
|
date_string.
|
|||
|
"""
|
|||
|
yield date_string
|
|||
|
if not pop_tz_cache:
|
|||
|
stripped_date_string, _ = pop_tz_offset_from_string(
|
|||
|
date_string, as_offset=False)
|
|||
|
if stripped_date_string == date_string:
|
|||
|
stripped_date_string = None
|
|||
|
pop_tz_cache[:] = [stripped_date_string]
|
|||
|
stripped_date_string, = pop_tz_cache
|
|||
|
if stripped_date_string is not None:
|
|||
|
yield stripped_date_string
|
|||
|
|
|||
|
if self.try_previous_locales:
|
|||
|
for locale in self.previous_locales.keys():
|
|||
|
for s in date_strings():
|
|||
|
if self._is_applicable_locale(locale, s):
|
|||
|
yield locale
|
|||
|
|
|||
|
if self.detect_languages_function and not self.languages and not self.locales:
|
|||
|
detected_languages = self.detect_languages_function(
|
|||
|
text=date_string, confidence_threshold=self._settings.LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD
|
|||
|
)
|
|||
|
|
|||
|
self.languages = map_languages(detected_languages)
|
|||
|
|
|||
|
for locale in self._get_locale_loader().get_locales(
|
|||
|
languages=self.languages, locales=self.locales, region=self.region,
|
|||
|
use_given_order=self.use_given_order):
|
|||
|
for s in date_strings():
|
|||
|
if self._is_applicable_locale(locale, s):
|
|||
|
yield locale
|
|||
|
|
|||
|
if self._settings.DEFAULT_LANGUAGES:
|
|||
|
for locale in self._get_locale_loader().get_locales(
|
|||
|
languages=self._settings.DEFAULT_LANGUAGES, locales=None,
|
|||
|
region=self.region, use_given_order=self.use_given_order
|
|||
|
):
|
|||
|
yield locale
|
|||
|
|
|||
|
def _is_applicable_locale(self, locale, date_string):
|
|||
|
return locale.is_applicable(
|
|||
|
date_string,
|
|||
|
strip_timezone=False, # it is stripped outside
|
|||
|
settings=self._settings)
|
|||
|
|
|||
|
@classmethod
|
|||
|
def _get_locale_loader(cls):
|
|||
|
if not cls.locale_loader:
|
|||
|
cls.locale_loader = LocaleDataLoader()
|
|||
|
return cls.locale_loader
|