# cython: auto_pickle=False """ Python Lexical Analyser Classes for building NFAs and DFAs """ from __future__ import absolute_import import cython from .Transitions import TransitionMap maxint = 2**31-1 # sentinel value if not cython.compiled: try: unichr except NameError: unichr = chr LOWEST_PRIORITY = -maxint class Machine(object): """A collection of Nodes representing an NFA or DFA.""" def __init__(self): self.states = [] # [Node] self.initial_states = {} # {(name, bol): Node} self.next_state_number = 1 def __del__(self): for state in self.states: state.destroy() def new_state(self): """Add a new state to the machine and return it.""" s = Node() n = self.next_state_number self.next_state_number = n + 1 s.number = n self.states.append(s) return s def new_initial_state(self, name): state = self.new_state() self.make_initial_state(name, state) return state def make_initial_state(self, name, state): self.initial_states[name] = state def get_initial_state(self, name): return self.initial_states[name] def dump(self, file): file.write("Plex.Machine:\n") if self.initial_states is not None: file.write(" Initial states:\n") for (name, state) in sorted(self.initial_states.items()): file.write(" '%s': %d\n" % (name, state.number)) for s in self.states: s.dump(file) class Node(object): """A state of an NFA or DFA.""" def __init__(self): # Preinitialise the list of empty transitions, because # the nfa-to-dfa algorithm needs it self.transitions = TransitionMap() # TransitionMap self.action_priority = LOWEST_PRIORITY # integer self.action = None # Action self.number = 0 # for debug output self.epsilon_closure = None # used by nfa_to_dfa() def destroy(self): self.transitions = None self.action = None self.epsilon_closure = None def add_transition(self, event, new_state): self.transitions.add(event, new_state) def link_to(self, state): """Add an epsilon-move from this state to another state.""" self.add_transition('', state) def set_action(self, action, priority): """Make this an accepting state with the given action. If there is already an action, choose the action with highest priority.""" if priority > self.action_priority: self.action = action self.action_priority = priority def get_action(self): return self.action def get_action_priority(self): return self.action_priority def is_accepting(self): return self.action is not None def __str__(self): return "State %d" % self.number def dump(self, file): # Header file.write(" State %d:\n" % self.number) # Transitions # self.dump_transitions(file) self.transitions.dump(file) # Action action = self.action priority = self.action_priority if action is not None: file.write(" %s [priority %d]\n" % (action, priority)) def __lt__(self, other): return self.number < other.number def __hash__(self): # Prevent overflowing hash values due to arbitrarily large unsigned addresses. return id(self) & maxint class FastMachine(object): """ FastMachine is a deterministic machine represented in a way that allows fast scanning. """ def __init__(self): self.initial_states = {} # {state_name:state} self.states = [] # [state] where state = {event:state, 'else':state, 'action':Action} self.next_number = 1 # for debugging self.new_state_template = { '': None, 'bol': None, 'eol': None, 'eof': None, 'else': None } def __del__(self): for state in self.states: state.clear() def new_state(self, action=None): number = self.next_number self.next_number = number + 1 result = self.new_state_template.copy() result['number'] = number result['action'] = action self.states.append(result) return result def make_initial_state(self, name, state): self.initial_states[name] = state @cython.locals(code0=cython.int, code1=cython.int, maxint=cython.int, state=dict) def add_transitions(self, state, event, new_state, maxint=maxint): if type(event) is tuple: code0, code1 = event if code0 == -maxint: state['else'] = new_state elif code1 != maxint: while code0 < code1: state[unichr(code0)] = new_state code0 += 1 else: state[event] = new_state def get_initial_state(self, name): return self.initial_states[name] def dump(self, file): file.write("Plex.FastMachine:\n") file.write(" Initial states:\n") for name, state in sorted(self.initial_states.items()): file.write(" %s: %s\n" % (repr(name), state['number'])) for state in self.states: self.dump_state(state, file) def dump_state(self, state, file): # Header file.write(" State %d:\n" % state['number']) # Transitions self.dump_transitions(state, file) # Action action = state['action'] if action is not None: file.write(" %s\n" % action) def dump_transitions(self, state, file): chars_leading_to_state = {} special_to_state = {} for (c, s) in state.items(): if len(c) == 1: chars = chars_leading_to_state.get(id(s), None) if chars is None: chars = [] chars_leading_to_state[id(s)] = chars chars.append(c) elif len(c) <= 4: special_to_state[c] = s ranges_to_state = {} for state in self.states: char_list = chars_leading_to_state.get(id(state), None) if char_list: ranges = self.chars_to_ranges(char_list) ranges_to_state[ranges] = state for ranges in sorted(ranges_to_state): key = self.ranges_to_string(ranges) state = ranges_to_state[ranges] file.write(" %s --> State %d\n" % (key, state['number'])) for key in ('bol', 'eol', 'eof', 'else'): state = special_to_state.get(key, None) if state: file.write(" %s --> State %d\n" % (key, state['number'])) @cython.locals(char_list=list, i=cython.Py_ssize_t, n=cython.Py_ssize_t, c1=cython.long, c2=cython.long) def chars_to_ranges(self, char_list): char_list.sort() i = 0 n = len(char_list) result = [] while i < n: c1 = ord(char_list[i]) c2 = c1 i += 1 while i < n and ord(char_list[i]) == c2 + 1: i += 1 c2 += 1 result.append((chr(c1), chr(c2))) return tuple(result) def ranges_to_string(self, range_list): return ','.join(map(self.range_to_string, range_list)) def range_to_string(self, range_tuple): (c1, c2) = range_tuple if c1 == c2: return repr(c1) else: return "%s..%s" % (repr(c1), repr(c2))