# cython: infer_types=True, language_level=3, auto_pickle=False # # Cython Scanner # from __future__ import absolute_import import cython cython.declare(make_lexicon=object, lexicon=object, print_function=object, error=object, warning=object, os=object, platform=object) import os import platform from unicodedata import normalize from contextlib import contextmanager from .. import Utils from ..Plex.Scanners import Scanner from ..Plex.Errors import UnrecognizedInput from .Errors import error, warning, hold_errors, release_errors, CompileError from .Lexicon import any_string_prefix, make_lexicon, IDENT from .Future import print_function debug_scanner = 0 trace_scanner = 0 scanner_debug_flags = 0 scanner_dump_file = None lexicon = None def get_lexicon(): global lexicon if not lexicon: lexicon = make_lexicon() return lexicon #------------------------------------------------------------------ py_reserved_words = [ "global", "nonlocal", "def", "class", "print", "del", "pass", "break", "continue", "return", "raise", "import", "exec", "try", "except", "finally", "while", "if", "elif", "else", "for", "in", "assert", "and", "or", "not", "is", "lambda", "from", "yield", "with", ] pyx_reserved_words = py_reserved_words + [ "include", "ctypedef", "cdef", "cpdef", "cimport", "DEF", "IF", "ELIF", "ELSE" ] #------------------------------------------------------------------ class CompileTimeScope(object): def __init__(self, outer=None): self.entries = {} self.outer = outer def declare(self, name, value): self.entries[name] = value def update(self, other): self.entries.update(other) def lookup_here(self, name): return self.entries[name] def __contains__(self, name): return name in self.entries def lookup(self, name): try: return self.lookup_here(name) except KeyError: outer = self.outer if outer: return outer.lookup(name) else: raise def initial_compile_time_env(): benv = CompileTimeScope() names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 'UNAME_VERSION', 'UNAME_MACHINE') for name, value in zip(names, platform.uname()): benv.declare(name, value) try: import __builtin__ as builtins except ImportError: import builtins names = ( 'False', 'True', 'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes', 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter', 'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len', 'list', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range', 'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str', 'sum', 'tuple', 'zip', ### defined below in a platform independent way # 'long', 'unicode', 'reduce', 'xrange' ) for name in names: try: benv.declare(name, getattr(builtins, name)) except AttributeError: # ignore, likely Py3 pass # Py2/3 adaptations from functools import reduce benv.declare('reduce', reduce) benv.declare('unicode', getattr(builtins, 'unicode', getattr(builtins, 'str'))) benv.declare('long', getattr(builtins, 'long', getattr(builtins, 'int'))) benv.declare('xrange', getattr(builtins, 'xrange', getattr(builtins, 'range'))) denv = CompileTimeScope(benv) return denv #------------------------------------------------------------------ class SourceDescriptor(object): """ A SourceDescriptor should be considered immutable. """ filename = None in_utility_code = False _file_type = 'pyx' _escaped_description = None _cmp_name = '' def __str__(self): assert False # To catch all places where a descriptor is used directly as a filename def set_file_type_from_name(self, filename): name, ext = os.path.splitext(filename) self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx' def is_cython_file(self): return self._file_type in ('pyx', 'pxd') def is_python_file(self): return self._file_type == 'py' def get_escaped_description(self): if self._escaped_description is None: esc_desc = \ self.get_description().encode('ASCII', 'replace').decode("ASCII") # Use forward slashes on Windows since these paths # will be used in the #line directives in the C/C++ files. self._escaped_description = esc_desc.replace('\\', '/') return self._escaped_description def __gt__(self, other): # this is only used to provide some sort of order try: return self._cmp_name > other._cmp_name except AttributeError: return False def __lt__(self, other): # this is only used to provide some sort of order try: return self._cmp_name < other._cmp_name except AttributeError: return False def __le__(self, other): # this is only used to provide some sort of order try: return self._cmp_name <= other._cmp_name except AttributeError: return False def __copy__(self): return self # immutable, no need to copy def __deepcopy__(self, memo): return self # immutable, no need to copy class FileSourceDescriptor(SourceDescriptor): """ Represents a code source. A code source is a more generic abstraction for a "filename" (as sometimes the code doesn't come from a file). Instances of code sources are passed to Scanner.__init__ as the optional name argument and will be passed back when asking for the position()-tuple. """ def __init__(self, filename, path_description=None): filename = Utils.decode_filename(filename) self.path_description = path_description or filename self.filename = filename # Prefer relative paths to current directory (which is most likely the project root) over absolute paths. workdir = os.path.abspath('.') + os.sep self.file_path = filename[len(workdir):] if filename.startswith(workdir) else filename self.set_file_type_from_name(filename) self._cmp_name = filename self._lines = {} def get_lines(self, encoding=None, error_handling=None): # we cache the lines only the second time this is called, in # order to save memory when they are only used once key = (encoding, error_handling) try: lines = self._lines[key] if lines is not None: return lines except KeyError: pass with Utils.open_source_file(self.filename, encoding=encoding, error_handling=error_handling) as f: lines = list(f) if key in self._lines: self._lines[key] = lines else: # do not cache the first access, but remember that we # already read it once self._lines[key] = None return lines def get_description(self): try: return os.path.relpath(self.path_description) except ValueError: # path not under current directory => use complete file path return self.path_description def get_error_description(self): path = self.filename cwd = Utils.decode_filename(os.getcwd() + os.path.sep) if path.startswith(cwd): return path[len(cwd):] return path def get_filenametable_entry(self): return self.file_path def __eq__(self, other): return isinstance(other, FileSourceDescriptor) and self.filename == other.filename def __hash__(self): return hash(self.filename) def __repr__(self): return "" % self.filename class StringSourceDescriptor(SourceDescriptor): """ Instances of this class can be used instead of a filenames if the code originates from a string object. """ def __init__(self, name, code): self.name = name #self.set_file_type_from_name(name) self.codelines = [x + "\n" for x in code.split("\n")] self._cmp_name = name def get_lines(self, encoding=None, error_handling=None): if not encoding: return self.codelines else: return [line.encode(encoding, error_handling).decode(encoding) for line in self.codelines] def get_description(self): return self.name get_error_description = get_description def get_filenametable_entry(self): return "" def __hash__(self): return id(self) # Do not hash on the name, an identical string source should be the # same object (name is often defaulted in other places) # return hash(self.name) def __eq__(self, other): return isinstance(other, StringSourceDescriptor) and self.name == other.name def __repr__(self): return "" % self.name #------------------------------------------------------------------ class PyrexScanner(Scanner): # context Context Compilation context # included_files [string] Files included with 'include' statement # compile_time_env dict Environment for conditional compilation # compile_time_eval boolean In a true conditional compilation context # compile_time_expr boolean In a compile-time expression context # put_back_on_failure list or None If set, this records states so the tentatively_scan # contextmanager can restore it def __init__(self, file, filename, parent_scanner=None, scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None): Scanner.__init__(self, get_lexicon(), file, filename, initial_pos) if filename.is_python_file(): self.in_python_file = True keywords = py_reserved_words else: self.in_python_file = False keywords = pyx_reserved_words self.keywords = {keyword: keyword for keyword in keywords} self.async_enabled = 0 if parent_scanner: self.context = parent_scanner.context self.included_files = parent_scanner.included_files self.compile_time_env = parent_scanner.compile_time_env self.compile_time_eval = parent_scanner.compile_time_eval self.compile_time_expr = parent_scanner.compile_time_expr if parent_scanner.async_enabled: self.enter_async() else: self.context = context self.included_files = scope.included_files self.compile_time_env = initial_compile_time_env() self.compile_time_eval = 1 self.compile_time_expr = 0 if getattr(context.options, 'compile_time_env', None): self.compile_time_env.update(context.options.compile_time_env) self.parse_comments = parse_comments self.source_encoding = source_encoding self.trace = trace_scanner self.indentation_stack = [0] self.indentation_char = None self.bracket_nesting_level = 0 self.put_back_on_failure = None self.begin('INDENT') self.sy = '' self.next() def normalize_ident(self, text): try: text.encode('ascii') # really just name.isascii but supports Python 2 and 3 except UnicodeEncodeError: text = normalize('NFKC', text) self.produce(IDENT, text) def commentline(self, text): if self.parse_comments: self.produce('commentline', text) def strip_underscores(self, text, symbol): self.produce(symbol, text.replace('_', '')) def current_level(self): return self.indentation_stack[-1] def open_bracket_action(self, text): self.bracket_nesting_level += 1 return text def close_bracket_action(self, text): self.bracket_nesting_level -= 1 return text def newline_action(self, text): if self.bracket_nesting_level == 0: self.begin('INDENT') self.produce('NEWLINE', '') string_states = { "'": 'SQ_STRING', '"': 'DQ_STRING', "'''": 'TSQ_STRING', '"""': 'TDQ_STRING' } def begin_string_action(self, text): while text[:1] in any_string_prefix: text = text[1:] self.begin(self.string_states[text]) self.produce('BEGIN_STRING') def end_string_action(self, text): self.begin('') self.produce('END_STRING') def unclosed_string_action(self, text): self.end_string_action(text) self.error_at_scanpos("Unclosed string literal") def indentation_action(self, text): self.begin('') # Indentation within brackets should be ignored. #if self.bracket_nesting_level > 0: # return # Check that tabs and spaces are being used consistently. if text: c = text[0] #print "Scanner.indentation_action: indent with", repr(c) ### if self.indentation_char is None: self.indentation_char = c #print "Scanner.indentation_action: setting indent_char to", repr(c) else: if self.indentation_char != c: self.error_at_scanpos("Mixed use of tabs and spaces") if text.replace(c, "") != "": self.error_at_scanpos("Mixed use of tabs and spaces") # Figure out how many indents/dedents to do current_level = self.current_level() new_level = len(text) #print "Changing indent level from", current_level, "to", new_level ### if new_level == current_level: return elif new_level > current_level: #print "...pushing level", new_level ### self.indentation_stack.append(new_level) self.produce('INDENT', '') else: while new_level < self.current_level(): #print "...popping level", self.indentation_stack[-1] ### self.indentation_stack.pop() self.produce('DEDENT', '') #print "...current level now", self.current_level() ### if new_level != self.current_level(): self.error_at_scanpos("Inconsistent indentation") def eof_action(self, text): while len(self.indentation_stack) > 1: self.produce('DEDENT', '') self.indentation_stack.pop() self.produce('EOF', '') def next(self): try: sy, systring = self.read() except UnrecognizedInput: self.error_at_scanpos("Unrecognized character") return # just a marker, error() always raises if sy == IDENT: if systring in self.keywords: if systring == u'print' and print_function in self.context.future_directives: self.keywords.pop('print', None) elif systring == u'exec' and self.context.language_level >= 3: self.keywords.pop('exec', None) else: sy = self.keywords[systring] # intern systring = self.context.intern_ustring(systring) if self.put_back_on_failure is not None: self.put_back_on_failure.append((sy, systring, self.position())) self.sy = sy self.systring = systring if False: # debug_scanner: _, line, col = self.position() if not self.systring or self.sy == self.systring: t = self.sy else: t = "%s %s" % (self.sy, self.systring) print("--- %3d %2d %s" % (line, col, t)) def peek(self): saved = self.sy, self.systring saved_pos = self.position() self.next() next = self.sy, self.systring self.unread(self.sy, self.systring, self.position()) self.sy, self.systring = saved self.last_token_position_tuple = saved_pos return next def put_back(self, sy, systring, pos): self.unread(self.sy, self.systring, self.last_token_position_tuple) self.sy = sy self.systring = systring self.last_token_position_tuple = pos def error(self, message, pos=None, fatal=True): if pos is None: pos = self.position() if self.sy == 'INDENT': error(pos, "Possible inconsistent indentation") err = error(pos, message) if fatal: raise err def error_at_scanpos(self, message): # Like error(fatal=True), but gets the current scanning position rather than # the position of the last token read. pos = self.get_current_scan_pos() self.error(message, pos, True) def expect(self, what, message=None): if self.sy == what: self.next() else: self.expected(what, message) def expect_keyword(self, what, message=None): if self.sy == IDENT and self.systring == what: self.next() else: self.expected(what, message) def expected(self, what, message=None): if message: self.error(message) else: if self.sy == IDENT: found = self.systring else: found = self.sy self.error("Expected '%s', found '%s'" % (what, found)) def expect_indent(self): self.expect('INDENT', "Expected an increase in indentation level") def expect_dedent(self): self.expect('DEDENT', "Expected a decrease in indentation level") def expect_newline(self, message="Expected a newline", ignore_semicolon=False): # Expect either a newline or end of file useless_trailing_semicolon = None if ignore_semicolon and self.sy == ';': useless_trailing_semicolon = self.position() self.next() if self.sy != 'EOF': self.expect('NEWLINE', message) if useless_trailing_semicolon is not None: warning(useless_trailing_semicolon, "useless trailing semicolon") def enter_async(self): self.async_enabled += 1 if self.async_enabled == 1: self.keywords['async'] = 'async' self.keywords['await'] = 'await' def exit_async(self): assert self.async_enabled > 0 self.async_enabled -= 1 if not self.async_enabled: del self.keywords['await'] del self.keywords['async'] if self.sy in ('async', 'await'): self.sy, self.systring = IDENT, self.context.intern_ustring(self.sy) @contextmanager @cython.locals(scanner=Scanner) def tentatively_scan(scanner): errors = hold_errors() try: put_back_on_failure = scanner.put_back_on_failure scanner.put_back_on_failure = [] initial_state = (scanner.sy, scanner.systring, scanner.position()) try: yield errors except CompileError as e: pass finally: if errors: if scanner.put_back_on_failure: for put_back in reversed(scanner.put_back_on_failure[:-1]): scanner.put_back(*put_back) # we need to restore the initial state too scanner.put_back(*initial_state) elif put_back_on_failure is not None: # the outer "tentatively_scan" block that we're in might still # want to undo this block put_back_on_failure.extend(scanner.put_back_on_failure) scanner.put_back_on_failure = put_back_on_failure finally: release_errors(ignore=True)