649 lines
23 KiB
Python
649 lines
23 KiB
Python
import calendar
|
|
import regex as re
|
|
|
|
import pytz
|
|
from io import StringIO
|
|
from collections import OrderedDict
|
|
from datetime import datetime
|
|
from datetime import timedelta
|
|
|
|
from dateparser.utils import set_correct_day_from_settings, \
|
|
get_last_day_of_month, get_previous_leap_year, get_next_leap_year, \
|
|
_get_missing_parts, get_timezone_from_tz_string
|
|
from dateparser.utils.strptime import strptime
|
|
|
|
|
|
NSP_COMPATIBLE = re.compile(r'\D+')
|
|
MERIDIAN = re.compile(r'am|pm')
|
|
MICROSECOND = re.compile(r'\d{1,6}')
|
|
EIGHT_DIGIT = re.compile(r'^\d{8}$')
|
|
HOUR_MINUTE_REGEX = re.compile(r'^([0-9]|0[0-9]|1[0-9]|2[0-3]):[0-5][0-9]$')
|
|
|
|
|
|
def no_space_parser_eligibile(datestring):
|
|
src = NSP_COMPATIBLE.search(datestring)
|
|
if not src or ':' == src.group():
|
|
return True
|
|
return False
|
|
|
|
|
|
def get_unresolved_attrs(parser_object):
|
|
attrs = ['year', 'month', 'day']
|
|
seen = []
|
|
unseen = []
|
|
for attr in attrs:
|
|
if getattr(parser_object, attr, None) is not None:
|
|
seen.append(attr)
|
|
else:
|
|
unseen.append(attr)
|
|
return seen, unseen
|
|
|
|
|
|
date_order_chart = {
|
|
'DMY': '%d%m%y',
|
|
'DYM': '%d%y%m',
|
|
'MDY': '%m%d%y',
|
|
'MYD': '%m%y%d',
|
|
'YDM': '%y%d%m',
|
|
'YMD': '%y%m%d',
|
|
}
|
|
|
|
|
|
def resolve_date_order(order, lst=None):
|
|
|
|
chart_list = {
|
|
'DMY': ['day', 'month', 'year'],
|
|
'DYM': ['day', 'year', 'month'],
|
|
'MDY': ['month', 'day', 'year'],
|
|
'MYD': ['month', 'year', 'day'],
|
|
'YDM': ['year', 'day', 'month'],
|
|
'YMD': ['year', 'month', 'day'],
|
|
}
|
|
|
|
return chart_list[order] if lst else date_order_chart[order]
|
|
|
|
|
|
def _parse_absolute(datestring, settings, tz=None):
|
|
return _parser.parse(datestring, settings, tz)
|
|
|
|
|
|
def _parse_nospaces(datestring, settings, tz=None):
|
|
return _no_spaces_parser.parse(datestring, settings)
|
|
|
|
|
|
class _time_parser:
|
|
time_directives = [
|
|
'%H:%M:%S',
|
|
'%I:%M:%S %p',
|
|
'%H:%M',
|
|
'%I:%M %p',
|
|
'%I %p',
|
|
'%H:%M:%S.%f',
|
|
'%I:%M:%S.%f %p',
|
|
'%H:%M %p'
|
|
]
|
|
|
|
def __call__(self, timestring):
|
|
_timestring = timestring
|
|
for directive in self.time_directives:
|
|
try:
|
|
return strptime(timestring.strip(), directive).time()
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
raise ValueError('%s does not seem to be a valid time string' % _timestring)
|
|
|
|
|
|
time_parser = _time_parser()
|
|
|
|
|
|
class _no_spaces_parser:
|
|
_dateformats = [
|
|
'%Y%m%d', '%Y%d%m', '%m%Y%d',
|
|
'%m%d%Y', '%d%Y%m', '%d%m%Y',
|
|
'%y%m%d', '%y%d%m', '%m%y%d',
|
|
'%m%d%y', '%d%y%m', '%d%m%y'
|
|
]
|
|
|
|
_preferred_formats = ['%Y%m%d%H%M', '%Y%m%d%H%M%S', '%Y%m%d%H%M%S.%f']
|
|
|
|
_preferred_formats_ordered_8_digit = ['%m%d%Y', '%d%m%Y', '%Y%m%d', '%Y%d%m', '%m%Y%d', '%d%Y%m']
|
|
|
|
_timeformats = ['%H%M%S.%f', '%H%M%S', '%H%M', '%H']
|
|
|
|
period = {
|
|
'day': ['%d', '%H', '%M', '%S'],
|
|
'month': ['%m']
|
|
}
|
|
|
|
_default_order = resolve_date_order('MDY')
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
self._all = (
|
|
self._dateformats + [x + y for x in self._dateformats for y in self._timeformats] + self._timeformats
|
|
)
|
|
|
|
self.date_formats = {
|
|
'%m%d%y': (
|
|
self._preferred_formats
|
|
+ sorted(self._all, key=lambda x: x.lower().startswith('%m%d%y'), reverse=True)
|
|
),
|
|
'%m%y%d': sorted(self._all, key=lambda x: x.lower().startswith('%m%y%d'), reverse=True),
|
|
'%y%m%d': sorted(self._all, key=lambda x: x.lower().startswith('%y%m%d'), reverse=True),
|
|
'%y%d%m': sorted(self._all, key=lambda x: x.lower().startswith('%y%d%m'), reverse=True),
|
|
'%d%m%y': sorted(self._all, key=lambda x: x.lower().startswith('%d%m%y'), reverse=True),
|
|
'%d%y%m': sorted(self._all, key=lambda x: x.lower().startswith('%d%y%m'), reverse=True),
|
|
}
|
|
|
|
@classmethod
|
|
def _get_period(cls, format_string):
|
|
for pname, pdrv in sorted(cls.period.items(), key=lambda x: x[0]):
|
|
for drv in pdrv:
|
|
if drv in format_string:
|
|
return pname
|
|
else:
|
|
return 'year'
|
|
|
|
@classmethod
|
|
def _find_best_matching_date(cls, datestring):
|
|
for fmt in cls._preferred_formats_ordered_8_digit:
|
|
try:
|
|
dt = strptime(datestring, fmt), cls._get_period(fmt)
|
|
if len(str(dt[0].year)) == 4:
|
|
return dt
|
|
except:
|
|
pass
|
|
return None
|
|
|
|
@classmethod
|
|
def parse(cls, datestring, settings):
|
|
if not no_space_parser_eligibile(datestring):
|
|
raise ValueError('Unable to parse date from: %s' % datestring)
|
|
|
|
datestring = datestring.replace(':', '')
|
|
if not datestring:
|
|
raise ValueError("Empty string")
|
|
tokens = tokenizer(datestring)
|
|
if settings.DATE_ORDER:
|
|
order = resolve_date_order(settings.DATE_ORDER)
|
|
else:
|
|
order = cls._default_order
|
|
if EIGHT_DIGIT.match(datestring):
|
|
dt = cls._find_best_matching_date(datestring)
|
|
if dt is not None:
|
|
return dt
|
|
nsp = cls()
|
|
ambiguous_date = None
|
|
for token, _ in tokens.tokenize():
|
|
for fmt in nsp.date_formats[order]:
|
|
try:
|
|
dt = strptime(token, fmt), cls._get_period(fmt)
|
|
if len(str(dt[0].year)) < 4:
|
|
ambiguous_date = dt
|
|
continue
|
|
|
|
missing = _get_missing_parts(fmt)
|
|
_check_strict_parsing(missing, settings)
|
|
return dt
|
|
except:
|
|
pass
|
|
else:
|
|
if ambiguous_date:
|
|
return ambiguous_date
|
|
else:
|
|
raise ValueError('Unable to parse date from: %s' % datestring)
|
|
|
|
|
|
def _get_missing_error(missing):
|
|
return 'Fields missing from the date string: {}'.format(', '.join(missing))
|
|
|
|
|
|
def _check_strict_parsing(missing, settings):
|
|
if settings.STRICT_PARSING and missing:
|
|
raise ValueError(_get_missing_error(missing))
|
|
elif settings.REQUIRE_PARTS and missing:
|
|
errors = [part for part in settings.REQUIRE_PARTS if part in missing]
|
|
if errors:
|
|
raise ValueError(_get_missing_error(errors))
|
|
|
|
|
|
class _parser:
|
|
|
|
alpha_directives = OrderedDict([
|
|
('weekday', ['%A', '%a']),
|
|
('month', ['%B', '%b']),
|
|
])
|
|
|
|
num_directives = {
|
|
'month': ['%m'],
|
|
'day': ['%d'],
|
|
'year': ['%y', '%Y'],
|
|
}
|
|
|
|
def __init__(self, tokens, settings):
|
|
self.settings = settings
|
|
self.tokens = [(t[0].strip(), t[1]) for t in list(tokens)]
|
|
self.filtered_tokens = [(t[0], t[1], i) for i, t in enumerate(self.tokens) if t[1] <= 1]
|
|
|
|
self.unset_tokens = []
|
|
|
|
self.day = None
|
|
self.month = None
|
|
self.year = None
|
|
self.time = None
|
|
|
|
self.auto_order = []
|
|
|
|
self._token_day = None
|
|
self._token_month = None
|
|
self._token_year = None
|
|
self._token_time = None
|
|
|
|
self.ordered_num_directives = OrderedDict(
|
|
(k, self.num_directives[k])
|
|
for k in (resolve_date_order(settings.DATE_ORDER, lst=True))
|
|
)
|
|
|
|
skip_index = []
|
|
skip_component = None
|
|
skip_tokens = ["t", "year", "hour", "minute"]
|
|
|
|
for index, token_type_original_index in enumerate(self.filtered_tokens):
|
|
|
|
if index in skip_index:
|
|
continue
|
|
|
|
token, type, original_index = token_type_original_index
|
|
|
|
if token in skip_tokens:
|
|
continue
|
|
|
|
if self.time is None:
|
|
meridian_index = index + 1
|
|
|
|
try:
|
|
# try case where hours and minutes are separated by a period. Example: 13.20.
|
|
_is_before_period = self.tokens[original_index + 1][0] == '.'
|
|
_is_after_period = original_index != 0 and self.tokens[original_index - 1][0] == '.'
|
|
|
|
if _is_before_period and not _is_after_period:
|
|
index_next_token = index + 1
|
|
next_token = self.filtered_tokens[index_next_token][0]
|
|
index_in_tokens_for_next_token = self.filtered_tokens[index_next_token][2]
|
|
|
|
next_token_is_last = index_next_token == len(self.filtered_tokens) - 1
|
|
if next_token_is_last or self.tokens[index_in_tokens_for_next_token + 1][0] != '.':
|
|
new_token = token + ':' + next_token
|
|
if re.match(HOUR_MINUTE_REGEX, new_token):
|
|
token = new_token
|
|
skip_index.append(index + 1)
|
|
meridian_index += 1
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
microsecond = MICROSECOND.search(self.filtered_tokens[index + 1][0]).group()
|
|
# Is after time token? raise ValueError if ':' can't be found:
|
|
token.index(":")
|
|
# Is after period? raise ValueError if '.' can't be found:
|
|
self.tokens[self.tokens.index((token, 0)) + 1][0].index('.')
|
|
except:
|
|
microsecond = None
|
|
|
|
if microsecond:
|
|
meridian_index += 1
|
|
|
|
try:
|
|
meridian = MERIDIAN.search(self.filtered_tokens[meridian_index][0]).group()
|
|
except:
|
|
meridian = None
|
|
|
|
if any([':' in token, meridian, microsecond]):
|
|
if meridian and not microsecond:
|
|
self._token_time = '%s %s' % (token, meridian)
|
|
skip_index.append(meridian_index)
|
|
elif microsecond and not meridian:
|
|
self._token_time = '%s.%s' % (token, microsecond)
|
|
skip_index.append(index + 1)
|
|
elif meridian and microsecond:
|
|
self._token_time = '%s.%s %s' % (token, microsecond, meridian)
|
|
skip_index.append(index + 1)
|
|
skip_index.append(meridian_index)
|
|
else:
|
|
self._token_time = token
|
|
self.time = lambda: time_parser(self._token_time)
|
|
continue
|
|
|
|
results = self._parse(type, token, skip_component=skip_component)
|
|
for res in results:
|
|
if len(token) == 4 and res[0] == 'year':
|
|
skip_component = 'year'
|
|
setattr(self, *res)
|
|
|
|
known, unknown = get_unresolved_attrs(self)
|
|
params = {}
|
|
for attr in known:
|
|
params.update({attr: getattr(self, attr)})
|
|
for attr in unknown:
|
|
for token, type, _ in self.unset_tokens:
|
|
if type == 0:
|
|
params.update({attr: int(token)})
|
|
setattr(self, '_token_%s' % attr, token)
|
|
setattr(self, attr, int(token))
|
|
|
|
def _get_period(self):
|
|
if self.settings.RETURN_TIME_AS_PERIOD:
|
|
if getattr(self, 'time', None):
|
|
return 'time'
|
|
|
|
for period in ['time', 'day']:
|
|
if getattr(self, period, None):
|
|
return 'day'
|
|
|
|
for period in ['month', 'year']:
|
|
if getattr(self, period, None):
|
|
return period
|
|
|
|
if self._results():
|
|
return 'day'
|
|
|
|
def _get_datetime_obj(self, **params):
|
|
try:
|
|
return datetime(**params)
|
|
except ValueError as e:
|
|
error_text = e.__str__()
|
|
error_msgs = ['day is out of range', 'day must be in']
|
|
if error_msgs[0] in error_text or error_msgs[1] in error_text:
|
|
if not(self._token_day or hasattr(self, '_token_weekday')):
|
|
# if day is not available put last day of the month
|
|
params['day'] = get_last_day_of_month(params['year'], params['month'])
|
|
return datetime(**params)
|
|
elif not self._token_year and params['day'] == 29 and params['month'] == 2 and \
|
|
not calendar.isleap(params['year']):
|
|
# fix the year when year is not present and it is 29 of February
|
|
params['year'] = self._get_correct_leap_year(self.settings.PREFER_DATES_FROM, params['year'])
|
|
return datetime(**params)
|
|
raise e
|
|
|
|
def _get_correct_leap_year(self, prefer_dates_from, current_year):
|
|
if prefer_dates_from == 'future':
|
|
return get_next_leap_year(current_year)
|
|
if prefer_dates_from == 'past':
|
|
return get_previous_leap_year(current_year)
|
|
|
|
# Default case ('current_period'): return closer leap year
|
|
next_leap_year = get_next_leap_year(current_year)
|
|
previous_leap_year = get_previous_leap_year(current_year)
|
|
next_leap_year_is_closer = next_leap_year - current_year < current_year - previous_leap_year
|
|
return next_leap_year if next_leap_year_is_closer else previous_leap_year
|
|
|
|
def _set_relative_base(self):
|
|
self.now = self.settings.RELATIVE_BASE
|
|
if not self.now:
|
|
self.now = datetime.utcnow()
|
|
|
|
def _get_datetime_obj_params(self):
|
|
if not self.now:
|
|
self._set_relative_base()
|
|
|
|
params = {
|
|
'day': self.day or self.now.day,
|
|
'month': self.month or self.now.month,
|
|
'year': self.year or self.now.year,
|
|
'hour': 0, 'minute': 0, 'second': 0, 'microsecond': 0,
|
|
}
|
|
return params
|
|
|
|
def _get_date_obj(self, token, directive):
|
|
return strptime(token, directive)
|
|
|
|
def _results(self):
|
|
missing = [
|
|
field for field in ('day', 'month', 'year')
|
|
if not getattr(self, field)
|
|
]
|
|
_check_strict_parsing(missing, self.settings)
|
|
self._set_relative_base()
|
|
|
|
time = self.time() if self.time is not None else None
|
|
params = self._get_datetime_obj_params()
|
|
|
|
if time:
|
|
params.update(dict(hour=time.hour,
|
|
minute=time.minute,
|
|
second=time.second,
|
|
microsecond=time.microsecond))
|
|
|
|
return self._get_datetime_obj(**params)
|
|
|
|
def _correct_for_time_frame(self, dateobj, tz):
|
|
days = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
|
|
|
token_weekday, _ = getattr(self, '_token_weekday', (None, None))
|
|
|
|
if token_weekday and not(self._token_year or self._token_month or self._token_day):
|
|
day_index = calendar.weekday(dateobj.year, dateobj.month, dateobj.day)
|
|
day = token_weekday[:3].lower()
|
|
steps = 0
|
|
if 'future' in self.settings.PREFER_DATES_FROM:
|
|
if days[day_index] == day:
|
|
steps = 7
|
|
else:
|
|
while days[day_index] != day:
|
|
day_index = (day_index + 1) % 7
|
|
steps += 1
|
|
delta = timedelta(days=steps)
|
|
else:
|
|
if days[day_index] == day:
|
|
if self.settings.PREFER_DATES_FROM == 'past':
|
|
steps = 7
|
|
else:
|
|
steps = 0
|
|
else:
|
|
while days[day_index] != day:
|
|
day_index -= 1
|
|
steps += 1
|
|
delta = timedelta(days=-steps)
|
|
|
|
dateobj = dateobj + delta
|
|
|
|
# NOTE: If this assert fires, self.now needs to be made offset-aware in a similar
|
|
# way that dateobj is temporarily made offset-aware.
|
|
assert not (self.now.tzinfo is None and dateobj.tzinfo is not None),\
|
|
"`self.now` doesn't have `tzinfo`. Review comment in code for details."
|
|
|
|
# Store the original dateobj values so that upon subsequent parsing everything is not
|
|
# treated as offset-aware if offset awareness is changed.
|
|
original_dateobj = dateobj
|
|
|
|
# Since date comparisons must be either offset-naive or offset-aware, normalize dateobj
|
|
# to be offset-aware if one or the other is already offset-aware.
|
|
if self.now.tzinfo is not None and dateobj.tzinfo is None:
|
|
dateobj = pytz.utc.localize(dateobj)
|
|
|
|
if self.month and not self.year:
|
|
try:
|
|
if self.now < dateobj:
|
|
if self.settings.PREFER_DATES_FROM == 'past':
|
|
dateobj = dateobj.replace(year=dateobj.year - 1)
|
|
else:
|
|
if self.settings.PREFER_DATES_FROM == 'future':
|
|
dateobj = dateobj.replace(year=dateobj.year + 1)
|
|
except ValueError as e:
|
|
if dateobj.day == 29 and dateobj.month == 2:
|
|
valid_year = self._get_correct_leap_year(
|
|
self.settings.PREFER_DATES_FROM, dateobj.year)
|
|
dateobj = dateobj.replace(year=valid_year)
|
|
else:
|
|
raise e
|
|
|
|
if self._token_year and len(self._token_year[0]) == 2:
|
|
if self.now < dateobj:
|
|
if 'past' in self.settings.PREFER_DATES_FROM:
|
|
dateobj = dateobj.replace(year=dateobj.year - 100)
|
|
else:
|
|
if 'future' in self.settings.PREFER_DATES_FROM:
|
|
dateobj = dateobj.replace(year=dateobj.year + 100)
|
|
|
|
if self._token_time and not any([self._token_year,
|
|
self._token_month,
|
|
self._token_day,
|
|
hasattr(self, '_token_weekday')]):
|
|
# Convert dateobj to utc time to compare with self.now
|
|
try:
|
|
tz = tz or get_timezone_from_tz_string(self.settings.TIMEZONE)
|
|
except pytz.UnknownTimeZoneError:
|
|
tz = None
|
|
|
|
if tz:
|
|
dateobj_time = (dateobj - tz.utcoffset(dateobj)).time()
|
|
else:
|
|
dateobj_time = dateobj.time()
|
|
if 'past' in self.settings.PREFER_DATES_FROM:
|
|
if self.now.time() < dateobj_time:
|
|
dateobj = dateobj + timedelta(days=-1)
|
|
if 'future' in self.settings.PREFER_DATES_FROM:
|
|
if self.now.time() > dateobj_time:
|
|
dateobj = dateobj + timedelta(days=1)
|
|
|
|
# Reset dateobj to the original value, thus removing any offset awareness that may
|
|
# have been set earlier.
|
|
dateobj = dateobj.replace(tzinfo=original_dateobj.tzinfo)
|
|
|
|
return dateobj
|
|
|
|
def _correct_for_day(self, dateobj):
|
|
if (
|
|
getattr(self, '_token_day', None)
|
|
or getattr(self, '_token_weekday', None)
|
|
or getattr(self, '_token_time', None)
|
|
):
|
|
return dateobj
|
|
|
|
dateobj = set_correct_day_from_settings(
|
|
dateobj, self.settings, current_day=self.now.day
|
|
)
|
|
return dateobj
|
|
|
|
@classmethod
|
|
def parse(cls, datestring, settings, tz=None):
|
|
tokens = tokenizer(datestring)
|
|
po = cls(tokens.tokenize(), settings)
|
|
dateobj = po._results()
|
|
|
|
# correction for past, future if applicable
|
|
dateobj = po._correct_for_time_frame(dateobj, tz)
|
|
|
|
# correction for preference of day: beginning, current, end
|
|
dateobj = po._correct_for_day(dateobj)
|
|
period = po._get_period()
|
|
|
|
return dateobj, period
|
|
|
|
def _parse(self, type, token, skip_component=None):
|
|
|
|
def set_and_return(token, type, component, dateobj, skip_date_order=False):
|
|
if not skip_date_order:
|
|
self.auto_order.append(component)
|
|
setattr(self, '_token_%s' % component, (token, type))
|
|
return [(component, getattr(dateobj, component))]
|
|
|
|
def parse_number(token, skip_component=None):
|
|
type = 0
|
|
|
|
for component, directives in self.ordered_num_directives.items():
|
|
if skip_component == component:
|
|
continue
|
|
for directive in directives:
|
|
try:
|
|
do = self._get_date_obj(token, directive)
|
|
prev_value = getattr(self, component, None)
|
|
if not prev_value:
|
|
return set_and_return(token, type, component, do)
|
|
else:
|
|
try:
|
|
prev_token, prev_type = getattr(self, '_token_%s' % component)
|
|
if prev_type == type:
|
|
do = self._get_date_obj(prev_token, directive)
|
|
except ValueError:
|
|
self.unset_tokens.append((prev_token, prev_type, component))
|
|
return set_and_return(token, type, component, do)
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
raise ValueError('Unable to parse: %s' % token)
|
|
|
|
def parse_alpha(token, skip_component=None):
|
|
type = 1
|
|
|
|
for component, directives in self.alpha_directives.items():
|
|
if skip_component == component:
|
|
continue
|
|
for directive in directives:
|
|
try:
|
|
do = self._get_date_obj(token, directive)
|
|
prev_value = getattr(self, component, None)
|
|
if not prev_value:
|
|
return set_and_return(token, type, component, do, skip_date_order=True)
|
|
elif component == 'month':
|
|
index = self.auto_order.index('month')
|
|
self.auto_order[index] = 'day'
|
|
setattr(self, '_token_day', self._token_month)
|
|
setattr(self, '_token_month', (token, type))
|
|
return [(component, getattr(do, component)), ('day', prev_value)]
|
|
except:
|
|
pass
|
|
else:
|
|
raise ValueError('Unable to parse: %s' % token)
|
|
|
|
handlers = {0: parse_number, 1: parse_alpha}
|
|
return handlers[type](token, skip_component)
|
|
|
|
|
|
class tokenizer:
|
|
digits = '0123456789:'
|
|
letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|
|
|
def _isletter(self, tkn):
|
|
return tkn in self.letters
|
|
|
|
def _isdigit(self, tkn):
|
|
return tkn in self.digits
|
|
|
|
def __init__(self, ds):
|
|
self.instream = StringIO(ds)
|
|
|
|
def _switch(self, chara, charb):
|
|
if self._isdigit(chara):
|
|
return 0, not self._isdigit(charb)
|
|
|
|
if self._isletter(chara):
|
|
return 1, not self._isletter(charb)
|
|
|
|
return 2, self._isdigit(charb) or self._isletter(charb)
|
|
|
|
def tokenize(self):
|
|
token = ''
|
|
EOF = False
|
|
|
|
while not EOF:
|
|
nextchar = self.instream.read(1)
|
|
|
|
if not nextchar:
|
|
EOF = True
|
|
type, _ = self._switch(token[-1], nextchar)
|
|
yield token, type
|
|
return
|
|
|
|
if token:
|
|
type, switch = self._switch(token[-1], nextchar)
|
|
|
|
if not switch:
|
|
token += nextchar
|
|
else:
|
|
yield token, type
|
|
token = nextchar
|
|
else:
|
|
token += nextchar
|