import hashlib from datetime import datetime from functools import wraps from dateparser.data.languages_info import language_order from .parser import date_order_chart from .utils import registry @registry class Settings: """Control and configure default parsing behavior of dateparser. Currently, supported settings are: * `DATE_ORDER` * `PREFER_LOCALE_DATE_ORDER` * `TIMEZONE` * `TO_TIMEZONE` * `RETURN_AS_TIMEZONE_AWARE` * `PREFER_DAY_OF_MONTH` * `PREFER_DATES_FROM` * `RELATIVE_BASE` * `STRICT_PARSING` * `REQUIRE_PARTS` * `SKIP_TOKENS` * `NORMALIZE` * `RETURN_TIME_AS_PERIOD` * `PARSERS` * `DEFAULT_LANGUAGES` * `LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD` * `CACHE_SIZE_LIMIT` """ _default = True _pyfile_data = None _mod_settings = dict() def __init__(self, settings=None): if settings: self._updateall(settings.items()) else: self._updateall(self._get_settings_from_pyfile().items()) @classmethod def get_key(cls, settings=None): if not settings: return 'default' keys = sorted(['%s-%s' % (key, str(settings[key])) for key in settings]) return hashlib.md5(''.join(keys).encode('utf-8')).hexdigest() @classmethod def _get_settings_from_pyfile(cls): if not cls._pyfile_data: from dateparser_data import settings cls._pyfile_data = settings.settings return cls._pyfile_data def _updateall(self, iterable): for key, value in iterable: setattr(self, key, value) def replace(self, mod_settings=None, **kwds): for k, v in kwds.items(): if v is None: raise TypeError('Invalid {{"{}": {}}}'.format(k, v)) for x in self._get_settings_from_pyfile().keys(): kwds.setdefault(x, getattr(self, x)) kwds['_default'] = False if mod_settings: kwds['_mod_settings'] = mod_settings return self.__class__(settings=kwds) settings = Settings() def apply_settings(f): @wraps(f) def wrapper(*args, **kwargs): mod_settings = kwargs.get('settings') kwargs['settings'] = mod_settings or settings if isinstance(kwargs['settings'], dict): kwargs['settings'] = settings.replace(mod_settings=mod_settings, **kwargs['settings']) if not isinstance(kwargs['settings'], Settings): raise TypeError("settings can only be either dict or instance of Settings class") return f(*args, **kwargs) return wrapper class SettingValidationError(ValueError): pass def _check_repeated_values(setting_name, setting_value): if len(setting_value) != len(set(setting_value)): raise SettingValidationError( 'There are repeated values in the "{}" setting'.format(setting_name) ) return def _check_require_part(setting_name, setting_value): """Returns `True` if the provided list of parts contains valid values""" invalid_values = set(setting_value) - {'day', 'month', 'year'} if invalid_values: raise SettingValidationError( '"{}" setting contains invalid values: {}'.format( setting_name, ', '.join(invalid_values) ) ) _check_repeated_values(setting_name, setting_value) def _check_parsers(setting_name, setting_value): """Returns `True` if the provided list of parsers contains valid values""" existing_parsers = [ 'timestamp', 'relative-time', 'custom-formats', 'absolute-time', 'no-spaces-time', 'negative-timestamp' ] # FIXME: Extract the list of existing parsers from another place (#798) unknown_parsers = set(setting_value) - set(existing_parsers) if unknown_parsers: raise SettingValidationError( 'Found unknown parsers in the "{}" setting: {}'.format( setting_name, ', '.join(unknown_parsers) ) ) _check_repeated_values(setting_name, setting_value) def _check_default_languages(setting_name, setting_value): unsupported_languages = set(setting_value) - set(language_order) if unsupported_languages: raise SettingValidationError( "Found invalid languages in the '{}' setting: {}".format( setting_name, ', '.join(map(repr, unsupported_languages)) ) ) _check_repeated_values(setting_name, setting_value) def _check_between_0_and_1(setting_name, setting_value): is_valid = 0 <= setting_value <= 1 if not is_valid: raise SettingValidationError( '{} is not a valid value for {}. It can take values between 0 and ' '1.'.format( setting_value, setting_name, ) ) def check_settings(settings): """ Check if provided settings are valid, if not it raises `SettingValidationError`. Only checks for the modified settings. """ settings_values = { 'DATE_ORDER': { 'values': tuple(date_order_chart.keys()), 'type': str, }, 'TIMEZONE': { # we don't check invalid Timezones as they raise an error 'type': str, }, 'TO_TIMEZONE': { # It defaults to None, but it's not allowed to use it directly # "values" can take unlimited options 'type': str }, 'RETURN_AS_TIMEZONE_AWARE': { # It defaults to 'default', but it's not allowed to use it directly 'type': bool }, 'PREFER_DAY_OF_MONTH': { 'values': ('current', 'first', 'last'), 'type': str }, 'PREFER_DATES_FROM': { 'values': ('current_period', 'past', 'future'), 'type': str, }, 'RELATIVE_BASE': { # "values" can take unlimited options 'type': datetime }, 'STRICT_PARSING': { 'type': bool }, 'REQUIRE_PARTS': { # "values" covered by the 'extra_check' 'type': list, 'extra_check': _check_require_part }, 'SKIP_TOKENS': { # "values" can take unlimited options 'type': list, }, 'NORMALIZE': { 'type': bool }, 'RETURN_TIME_AS_PERIOD': { 'type': bool }, 'PARSERS': { # "values" covered by the 'extra_check' 'type': list, 'extra_check': _check_parsers }, 'FUZZY': { 'type': bool }, 'PREFER_LOCALE_DATE_ORDER': { 'type': bool }, 'DEFAULT_LANGUAGES': { 'type': list, 'extra_check': _check_default_languages }, 'LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD': { 'type': float, 'extra_check': _check_between_0_and_1 }, 'CACHE_SIZE_LIMIT': { 'type': int, }, } modified_settings = settings._mod_settings # check only modified settings # check settings keys: for setting in modified_settings: if setting not in settings_values: raise SettingValidationError('"{}" is not a valid setting'.format(setting)) for setting_name, setting_value in modified_settings.items(): setting_type = type(setting_value) setting_props = settings_values[setting_name] # check type: if not setting_type == setting_props['type']: raise SettingValidationError( '"{}" must be "{}", not "{}".'.format( setting_name, setting_props['type'].__name__, setting_type.__name__ ) ) # check values: if setting_props.get('values') and setting_value not in setting_props['values']: raise SettingValidationError( '"{}" is not a valid value for "{}", it should be: "{}" or "{}"'.format( setting_value, setting_name, '", "'.join(setting_props['values'][:-1]), setting_props['values'][-1], ) ) # specific checks extra_check = setting_props.get('extra_check') if extra_check: extra_check(setting_name, setting_value)