1951 lines
72 KiB
Python
1951 lines
72 KiB
Python
#------------------------------------------------------------------------------
|
|
# pycparser: c_parser.py
|
|
#
|
|
# CParser class: Parser and AST builder for the C language
|
|
#
|
|
# Eli Bendersky [https://eli.thegreenplace.net/]
|
|
# License: BSD
|
|
#------------------------------------------------------------------------------
|
|
from .ply import yacc
|
|
|
|
from . import c_ast
|
|
from .c_lexer import CLexer
|
|
from .plyparser import PLYParser, ParseError, parameterized, template
|
|
from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
|
|
|
|
|
|
@template
|
|
class CParser(PLYParser):
|
|
def __init__(
|
|
self,
|
|
lex_optimize=True,
|
|
lexer=CLexer,
|
|
lextab='pycparser.lextab',
|
|
yacc_optimize=True,
|
|
yacctab='pycparser.yacctab',
|
|
yacc_debug=False,
|
|
taboutputdir=''):
|
|
""" Create a new CParser.
|
|
|
|
Some arguments for controlling the debug/optimization
|
|
level of the parser are provided. The defaults are
|
|
tuned for release/performance mode.
|
|
The simple rules for using them are:
|
|
*) When tweaking CParser/CLexer, set these to False
|
|
*) When releasing a stable parser, set to True
|
|
|
|
lex_optimize:
|
|
Set to False when you're modifying the lexer.
|
|
Otherwise, changes in the lexer won't be used, if
|
|
some lextab.py file exists.
|
|
When releasing with a stable lexer, set to True
|
|
to save the re-generation of the lexer table on
|
|
each run.
|
|
|
|
lexer:
|
|
Set this parameter to define the lexer to use if
|
|
you're not using the default CLexer.
|
|
|
|
lextab:
|
|
Points to the lex table that's used for optimized
|
|
mode. Only if you're modifying the lexer and want
|
|
some tests to avoid re-generating the table, make
|
|
this point to a local lex table file (that's been
|
|
earlier generated with lex_optimize=True)
|
|
|
|
yacc_optimize:
|
|
Set to False when you're modifying the parser.
|
|
Otherwise, changes in the parser won't be used, if
|
|
some parsetab.py file exists.
|
|
When releasing with a stable parser, set to True
|
|
to save the re-generation of the parser table on
|
|
each run.
|
|
|
|
yacctab:
|
|
Points to the yacc table that's used for optimized
|
|
mode. Only if you're modifying the parser, make
|
|
this point to a local yacc table file
|
|
|
|
yacc_debug:
|
|
Generate a parser.out file that explains how yacc
|
|
built the parsing table from the grammar.
|
|
|
|
taboutputdir:
|
|
Set this parameter to control the location of generated
|
|
lextab and yacctab files.
|
|
"""
|
|
self.clex = lexer(
|
|
error_func=self._lex_error_func,
|
|
on_lbrace_func=self._lex_on_lbrace_func,
|
|
on_rbrace_func=self._lex_on_rbrace_func,
|
|
type_lookup_func=self._lex_type_lookup_func)
|
|
|
|
self.clex.build(
|
|
optimize=lex_optimize,
|
|
lextab=lextab,
|
|
outputdir=taboutputdir)
|
|
self.tokens = self.clex.tokens
|
|
|
|
rules_with_opt = [
|
|
'abstract_declarator',
|
|
'assignment_expression',
|
|
'declaration_list',
|
|
'declaration_specifiers_no_type',
|
|
'designation',
|
|
'expression',
|
|
'identifier_list',
|
|
'init_declarator_list',
|
|
'id_init_declarator_list',
|
|
'initializer_list',
|
|
'parameter_type_list',
|
|
'block_item_list',
|
|
'type_qualifier_list',
|
|
'struct_declarator_list'
|
|
]
|
|
|
|
for rule in rules_with_opt:
|
|
self._create_opt_rule(rule)
|
|
|
|
self.cparser = yacc.yacc(
|
|
module=self,
|
|
start='translation_unit_or_empty',
|
|
debug=yacc_debug,
|
|
optimize=yacc_optimize,
|
|
tabmodule=yacctab,
|
|
outputdir=taboutputdir)
|
|
|
|
# Stack of scopes for keeping track of symbols. _scope_stack[-1] is
|
|
# the current (topmost) scope. Each scope is a dictionary that
|
|
# specifies whether a name is a type. If _scope_stack[n][name] is
|
|
# True, 'name' is currently a type in the scope. If it's False,
|
|
# 'name' is used in the scope but not as a type (for instance, if we
|
|
# saw: int name;
|
|
# If 'name' is not a key in _scope_stack[n] then 'name' was not defined
|
|
# in this scope at all.
|
|
self._scope_stack = [dict()]
|
|
|
|
# Keeps track of the last token given to yacc (the lookahead token)
|
|
self._last_yielded_token = None
|
|
|
|
def parse(self, text, filename='', debug=False):
|
|
""" Parses C code and returns an AST.
|
|
|
|
text:
|
|
A string containing the C source code
|
|
|
|
filename:
|
|
Name of the file being parsed (for meaningful
|
|
error messages)
|
|
|
|
debug:
|
|
Debug flag to YACC
|
|
"""
|
|
self.clex.filename = filename
|
|
self.clex.reset_lineno()
|
|
self._scope_stack = [dict()]
|
|
self._last_yielded_token = None
|
|
return self.cparser.parse(
|
|
input=text,
|
|
lexer=self.clex,
|
|
debug=debug)
|
|
|
|
######################-- PRIVATE --######################
|
|
|
|
def _push_scope(self):
|
|
self._scope_stack.append(dict())
|
|
|
|
def _pop_scope(self):
|
|
assert len(self._scope_stack) > 1
|
|
self._scope_stack.pop()
|
|
|
|
def _add_typedef_name(self, name, coord):
|
|
""" Add a new typedef name (ie a TYPEID) to the current scope
|
|
"""
|
|
if not self._scope_stack[-1].get(name, True):
|
|
self._parse_error(
|
|
"Typedef %r previously declared as non-typedef "
|
|
"in this scope" % name, coord)
|
|
self._scope_stack[-1][name] = True
|
|
|
|
def _add_identifier(self, name, coord):
|
|
""" Add a new object, function, or enum member name (ie an ID) to the
|
|
current scope
|
|
"""
|
|
if self._scope_stack[-1].get(name, False):
|
|
self._parse_error(
|
|
"Non-typedef %r previously declared as typedef "
|
|
"in this scope" % name, coord)
|
|
self._scope_stack[-1][name] = False
|
|
|
|
def _is_type_in_scope(self, name):
|
|
""" Is *name* a typedef-name in the current scope?
|
|
"""
|
|
for scope in reversed(self._scope_stack):
|
|
# If name is an identifier in this scope it shadows typedefs in
|
|
# higher scopes.
|
|
in_scope = scope.get(name)
|
|
if in_scope is not None: return in_scope
|
|
return False
|
|
|
|
def _lex_error_func(self, msg, line, column):
|
|
self._parse_error(msg, self._coord(line, column))
|
|
|
|
def _lex_on_lbrace_func(self):
|
|
self._push_scope()
|
|
|
|
def _lex_on_rbrace_func(self):
|
|
self._pop_scope()
|
|
|
|
def _lex_type_lookup_func(self, name):
|
|
""" Looks up types that were previously defined with
|
|
typedef.
|
|
Passed to the lexer for recognizing identifiers that
|
|
are types.
|
|
"""
|
|
is_type = self._is_type_in_scope(name)
|
|
return is_type
|
|
|
|
def _get_yacc_lookahead_token(self):
|
|
""" We need access to yacc's lookahead token in certain cases.
|
|
This is the last token yacc requested from the lexer, so we
|
|
ask the lexer.
|
|
"""
|
|
return self.clex.last_token
|
|
|
|
# To understand what's going on here, read sections A.8.5 and
|
|
# A.8.6 of K&R2 very carefully.
|
|
#
|
|
# A C type consists of a basic type declaration, with a list
|
|
# of modifiers. For example:
|
|
#
|
|
# int *c[5];
|
|
#
|
|
# The basic declaration here is 'int c', and the pointer and
|
|
# the array are the modifiers.
|
|
#
|
|
# Basic declarations are represented by TypeDecl (from module c_ast) and the
|
|
# modifiers are FuncDecl, PtrDecl and ArrayDecl.
|
|
#
|
|
# The standard states that whenever a new modifier is parsed, it should be
|
|
# added to the end of the list of modifiers. For example:
|
|
#
|
|
# K&R2 A.8.6.2: Array Declarators
|
|
#
|
|
# In a declaration T D where D has the form
|
|
# D1 [constant-expression-opt]
|
|
# and the type of the identifier in the declaration T D1 is
|
|
# "type-modifier T", the type of the
|
|
# identifier of D is "type-modifier array of T"
|
|
#
|
|
# This is what this method does. The declarator it receives
|
|
# can be a list of declarators ending with TypeDecl. It
|
|
# tacks the modifier to the end of this list, just before
|
|
# the TypeDecl.
|
|
#
|
|
# Additionally, the modifier may be a list itself. This is
|
|
# useful for pointers, that can come as a chain from the rule
|
|
# p_pointer. In this case, the whole modifier list is spliced
|
|
# into the new location.
|
|
def _type_modify_decl(self, decl, modifier):
|
|
""" Tacks a type modifier on a declarator, and returns
|
|
the modified declarator.
|
|
|
|
Note: the declarator and modifier may be modified
|
|
"""
|
|
#~ print '****'
|
|
#~ decl.show(offset=3)
|
|
#~ modifier.show(offset=3)
|
|
#~ print '****'
|
|
|
|
modifier_head = modifier
|
|
modifier_tail = modifier
|
|
|
|
# The modifier may be a nested list. Reach its tail.
|
|
while modifier_tail.type:
|
|
modifier_tail = modifier_tail.type
|
|
|
|
# If the decl is a basic type, just tack the modifier onto it.
|
|
if isinstance(decl, c_ast.TypeDecl):
|
|
modifier_tail.type = decl
|
|
return modifier
|
|
else:
|
|
# Otherwise, the decl is a list of modifiers. Reach
|
|
# its tail and splice the modifier onto the tail,
|
|
# pointing to the underlying basic type.
|
|
decl_tail = decl
|
|
|
|
while not isinstance(decl_tail.type, c_ast.TypeDecl):
|
|
decl_tail = decl_tail.type
|
|
|
|
modifier_tail.type = decl_tail.type
|
|
decl_tail.type = modifier_head
|
|
return decl
|
|
|
|
# Due to the order in which declarators are constructed,
|
|
# they have to be fixed in order to look like a normal AST.
|
|
#
|
|
# When a declaration arrives from syntax construction, it has
|
|
# these problems:
|
|
# * The innermost TypeDecl has no type (because the basic
|
|
# type is only known at the uppermost declaration level)
|
|
# * The declaration has no variable name, since that is saved
|
|
# in the innermost TypeDecl
|
|
# * The typename of the declaration is a list of type
|
|
# specifiers, and not a node. Here, basic identifier types
|
|
# should be separated from more complex types like enums
|
|
# and structs.
|
|
#
|
|
# This method fixes these problems.
|
|
def _fix_decl_name_type(self, decl, typename):
|
|
""" Fixes a declaration. Modifies decl.
|
|
"""
|
|
# Reach the underlying basic type
|
|
#
|
|
type = decl
|
|
while not isinstance(type, c_ast.TypeDecl):
|
|
type = type.type
|
|
|
|
decl.name = type.declname
|
|
type.quals = decl.quals[:]
|
|
|
|
# The typename is a list of types. If any type in this
|
|
# list isn't an IdentifierType, it must be the only
|
|
# type in the list (it's illegal to declare "int enum ..")
|
|
# If all the types are basic, they're collected in the
|
|
# IdentifierType holder.
|
|
for tn in typename:
|
|
if not isinstance(tn, c_ast.IdentifierType):
|
|
if len(typename) > 1:
|
|
self._parse_error(
|
|
"Invalid multiple types specified", tn.coord)
|
|
else:
|
|
type.type = tn
|
|
return decl
|
|
|
|
if not typename:
|
|
# Functions default to returning int
|
|
#
|
|
if not isinstance(decl.type, c_ast.FuncDecl):
|
|
self._parse_error(
|
|
"Missing type in declaration", decl.coord)
|
|
type.type = c_ast.IdentifierType(
|
|
['int'],
|
|
coord=decl.coord)
|
|
else:
|
|
# At this point, we know that typename is a list of IdentifierType
|
|
# nodes. Concatenate all the names into a single list.
|
|
#
|
|
type.type = c_ast.IdentifierType(
|
|
[name for id in typename for name in id.names],
|
|
coord=typename[0].coord)
|
|
return decl
|
|
|
|
def _add_declaration_specifier(self, declspec, newspec, kind, append=False):
|
|
""" Declaration specifiers are represented by a dictionary
|
|
with the entries:
|
|
* qual: a list of type qualifiers
|
|
* storage: a list of storage type qualifiers
|
|
* type: a list of type specifiers
|
|
* function: a list of function specifiers
|
|
* alignment: a list of alignment specifiers
|
|
|
|
This method is given a declaration specifier, and a
|
|
new specifier of a given kind.
|
|
If `append` is True, the new specifier is added to the end of
|
|
the specifiers list, otherwise it's added at the beginning.
|
|
Returns the declaration specifier, with the new
|
|
specifier incorporated.
|
|
"""
|
|
spec = declspec or dict(qual=[], storage=[], type=[], function=[], alignment=[])
|
|
|
|
if append:
|
|
spec[kind].append(newspec)
|
|
else:
|
|
spec[kind].insert(0, newspec)
|
|
|
|
return spec
|
|
|
|
def _build_declarations(self, spec, decls, typedef_namespace=False):
|
|
""" Builds a list of declarations all sharing the given specifiers.
|
|
If typedef_namespace is true, each declared name is added
|
|
to the "typedef namespace", which also includes objects,
|
|
functions, and enum constants.
|
|
"""
|
|
is_typedef = 'typedef' in spec['storage']
|
|
declarations = []
|
|
|
|
# Bit-fields are allowed to be unnamed.
|
|
if decls[0].get('bitsize') is not None:
|
|
pass
|
|
|
|
# When redeclaring typedef names as identifiers in inner scopes, a
|
|
# problem can occur where the identifier gets grouped into
|
|
# spec['type'], leaving decl as None. This can only occur for the
|
|
# first declarator.
|
|
elif decls[0]['decl'] is None:
|
|
if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \
|
|
not self._is_type_in_scope(spec['type'][-1].names[0]):
|
|
coord = '?'
|
|
for t in spec['type']:
|
|
if hasattr(t, 'coord'):
|
|
coord = t.coord
|
|
break
|
|
self._parse_error('Invalid declaration', coord)
|
|
|
|
# Make this look as if it came from "direct_declarator:ID"
|
|
decls[0]['decl'] = c_ast.TypeDecl(
|
|
declname=spec['type'][-1].names[0],
|
|
type=None,
|
|
quals=None,
|
|
align=spec['alignment'],
|
|
coord=spec['type'][-1].coord)
|
|
# Remove the "new" type's name from the end of spec['type']
|
|
del spec['type'][-1]
|
|
|
|
# A similar problem can occur where the declaration ends up looking
|
|
# like an abstract declarator. Give it a name if this is the case.
|
|
elif not isinstance(decls[0]['decl'], (
|
|
c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
|
|
decls_0_tail = decls[0]['decl']
|
|
while not isinstance(decls_0_tail, c_ast.TypeDecl):
|
|
decls_0_tail = decls_0_tail.type
|
|
if decls_0_tail.declname is None:
|
|
decls_0_tail.declname = spec['type'][-1].names[0]
|
|
del spec['type'][-1]
|
|
|
|
for decl in decls:
|
|
assert decl['decl'] is not None
|
|
if is_typedef:
|
|
declaration = c_ast.Typedef(
|
|
name=None,
|
|
quals=spec['qual'],
|
|
storage=spec['storage'],
|
|
type=decl['decl'],
|
|
coord=decl['decl'].coord)
|
|
else:
|
|
declaration = c_ast.Decl(
|
|
name=None,
|
|
quals=spec['qual'],
|
|
align=spec['alignment'],
|
|
storage=spec['storage'],
|
|
funcspec=spec['function'],
|
|
type=decl['decl'],
|
|
init=decl.get('init'),
|
|
bitsize=decl.get('bitsize'),
|
|
coord=decl['decl'].coord)
|
|
|
|
if isinstance(declaration.type, (
|
|
c_ast.Enum, c_ast.Struct, c_ast.Union,
|
|
c_ast.IdentifierType)):
|
|
fixed_decl = declaration
|
|
else:
|
|
fixed_decl = self._fix_decl_name_type(declaration, spec['type'])
|
|
|
|
# Add the type name defined by typedef to a
|
|
# symbol table (for usage in the lexer)
|
|
if typedef_namespace:
|
|
if is_typedef:
|
|
self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
|
|
else:
|
|
self._add_identifier(fixed_decl.name, fixed_decl.coord)
|
|
|
|
fixed_decl = fix_atomic_specifiers(fixed_decl)
|
|
declarations.append(fixed_decl)
|
|
|
|
return declarations
|
|
|
|
def _build_function_definition(self, spec, decl, param_decls, body):
|
|
""" Builds a function definition.
|
|
"""
|
|
if 'typedef' in spec['storage']:
|
|
self._parse_error("Invalid typedef", decl.coord)
|
|
|
|
declaration = self._build_declarations(
|
|
spec=spec,
|
|
decls=[dict(decl=decl, init=None)],
|
|
typedef_namespace=True)[0]
|
|
|
|
return c_ast.FuncDef(
|
|
decl=declaration,
|
|
param_decls=param_decls,
|
|
body=body,
|
|
coord=decl.coord)
|
|
|
|
def _select_struct_union_class(self, token):
|
|
""" Given a token (either STRUCT or UNION), selects the
|
|
appropriate AST class.
|
|
"""
|
|
if token == 'struct':
|
|
return c_ast.Struct
|
|
else:
|
|
return c_ast.Union
|
|
|
|
##
|
|
## Precedence and associativity of operators
|
|
##
|
|
# If this changes, c_generator.CGenerator.precedence_map needs to change as
|
|
# well
|
|
precedence = (
|
|
('left', 'LOR'),
|
|
('left', 'LAND'),
|
|
('left', 'OR'),
|
|
('left', 'XOR'),
|
|
('left', 'AND'),
|
|
('left', 'EQ', 'NE'),
|
|
('left', 'GT', 'GE', 'LT', 'LE'),
|
|
('left', 'RSHIFT', 'LSHIFT'),
|
|
('left', 'PLUS', 'MINUS'),
|
|
('left', 'TIMES', 'DIVIDE', 'MOD')
|
|
)
|
|
|
|
##
|
|
## Grammar productions
|
|
## Implementation of the BNF defined in K&R2 A.13
|
|
##
|
|
|
|
# Wrapper around a translation unit, to allow for empty input.
|
|
# Not strictly part of the C99 Grammar, but useful in practice.
|
|
def p_translation_unit_or_empty(self, p):
|
|
""" translation_unit_or_empty : translation_unit
|
|
| empty
|
|
"""
|
|
if p[1] is None:
|
|
p[0] = c_ast.FileAST([])
|
|
else:
|
|
p[0] = c_ast.FileAST(p[1])
|
|
|
|
def p_translation_unit_1(self, p):
|
|
""" translation_unit : external_declaration
|
|
"""
|
|
# Note: external_declaration is already a list
|
|
p[0] = p[1]
|
|
|
|
def p_translation_unit_2(self, p):
|
|
""" translation_unit : translation_unit external_declaration
|
|
"""
|
|
p[1].extend(p[2])
|
|
p[0] = p[1]
|
|
|
|
# Declarations always come as lists (because they can be
|
|
# several in one line), so we wrap the function definition
|
|
# into a list as well, to make the return value of
|
|
# external_declaration homogeneous.
|
|
def p_external_declaration_1(self, p):
|
|
""" external_declaration : function_definition
|
|
"""
|
|
p[0] = [p[1]]
|
|
|
|
def p_external_declaration_2(self, p):
|
|
""" external_declaration : declaration
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
def p_external_declaration_3(self, p):
|
|
""" external_declaration : pp_directive
|
|
| pppragma_directive
|
|
"""
|
|
p[0] = [p[1]]
|
|
|
|
def p_external_declaration_4(self, p):
|
|
""" external_declaration : SEMI
|
|
"""
|
|
p[0] = []
|
|
|
|
def p_external_declaration_5(self, p):
|
|
""" external_declaration : static_assert
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
def p_static_assert_declaration(self, p):
|
|
""" static_assert : _STATIC_ASSERT LPAREN constant_expression COMMA unified_string_literal RPAREN
|
|
| _STATIC_ASSERT LPAREN constant_expression RPAREN
|
|
"""
|
|
if len(p) == 5:
|
|
p[0] = [c_ast.StaticAssert(p[3], None, self._token_coord(p, 1))]
|
|
else:
|
|
p[0] = [c_ast.StaticAssert(p[3], p[5], self._token_coord(p, 1))]
|
|
|
|
def p_pp_directive(self, p):
|
|
""" pp_directive : PPHASH
|
|
"""
|
|
self._parse_error('Directives not supported yet',
|
|
self._token_coord(p, 1))
|
|
|
|
# This encompasses two types of C99-compatible pragmas:
|
|
# - The #pragma directive:
|
|
# # pragma character_sequence
|
|
# - The _Pragma unary operator:
|
|
# _Pragma ( " string_literal " )
|
|
def p_pppragma_directive(self, p):
|
|
""" pppragma_directive : PPPRAGMA
|
|
| PPPRAGMA PPPRAGMASTR
|
|
| _PRAGMA LPAREN unified_string_literal RPAREN
|
|
"""
|
|
if len(p) == 5:
|
|
p[0] = c_ast.Pragma(p[3], self._token_coord(p, 2))
|
|
elif len(p) == 3:
|
|
p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2))
|
|
else:
|
|
p[0] = c_ast.Pragma("", self._token_coord(p, 1))
|
|
|
|
def p_pppragma_directive_list(self, p):
|
|
""" pppragma_directive_list : pppragma_directive
|
|
| pppragma_directive_list pppragma_directive
|
|
"""
|
|
p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
|
|
|
|
# In function definitions, the declarator can be followed by
|
|
# a declaration list, for old "K&R style" function definitios.
|
|
def p_function_definition_1(self, p):
|
|
""" function_definition : id_declarator declaration_list_opt compound_statement
|
|
"""
|
|
# no declaration specifiers - 'int' becomes the default type
|
|
spec = dict(
|
|
qual=[],
|
|
alignment=[],
|
|
storage=[],
|
|
type=[c_ast.IdentifierType(['int'],
|
|
coord=self._token_coord(p, 1))],
|
|
function=[])
|
|
|
|
p[0] = self._build_function_definition(
|
|
spec=spec,
|
|
decl=p[1],
|
|
param_decls=p[2],
|
|
body=p[3])
|
|
|
|
def p_function_definition_2(self, p):
|
|
""" function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement
|
|
"""
|
|
spec = p[1]
|
|
|
|
p[0] = self._build_function_definition(
|
|
spec=spec,
|
|
decl=p[2],
|
|
param_decls=p[3],
|
|
body=p[4])
|
|
|
|
# Note, according to C18 A.2.2 6.7.10 static_assert-declaration _Static_assert
|
|
# is a declaration, not a statement. We additionally recognise it as a statement
|
|
# to fix parsing of _Static_assert inside the functions.
|
|
#
|
|
def p_statement(self, p):
|
|
""" statement : labeled_statement
|
|
| expression_statement
|
|
| compound_statement
|
|
| selection_statement
|
|
| iteration_statement
|
|
| jump_statement
|
|
| pppragma_directive
|
|
| static_assert
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
# A pragma is generally considered a decorator rather than an actual
|
|
# statement. Still, for the purposes of analyzing an abstract syntax tree of
|
|
# C code, pragma's should not be ignored and were previously treated as a
|
|
# statement. This presents a problem for constructs that take a statement
|
|
# such as labeled_statements, selection_statements, and
|
|
# iteration_statements, causing a misleading structure in the AST. For
|
|
# example, consider the following C code.
|
|
#
|
|
# for (int i = 0; i < 3; i++)
|
|
# #pragma omp critical
|
|
# sum += 1;
|
|
#
|
|
# This code will compile and execute "sum += 1;" as the body of the for
|
|
# loop. Previous implementations of PyCParser would render the AST for this
|
|
# block of code as follows:
|
|
#
|
|
# For:
|
|
# DeclList:
|
|
# Decl: i, [], [], []
|
|
# TypeDecl: i, []
|
|
# IdentifierType: ['int']
|
|
# Constant: int, 0
|
|
# BinaryOp: <
|
|
# ID: i
|
|
# Constant: int, 3
|
|
# UnaryOp: p++
|
|
# ID: i
|
|
# Pragma: omp critical
|
|
# Assignment: +=
|
|
# ID: sum
|
|
# Constant: int, 1
|
|
#
|
|
# This AST misleadingly takes the Pragma as the body of the loop and the
|
|
# assignment then becomes a sibling of the loop.
|
|
#
|
|
# To solve edge cases like these, the pragmacomp_or_statement rule groups
|
|
# a pragma and its following statement (which would otherwise be orphaned)
|
|
# using a compound block, effectively turning the above code into:
|
|
#
|
|
# for (int i = 0; i < 3; i++) {
|
|
# #pragma omp critical
|
|
# sum += 1;
|
|
# }
|
|
def p_pragmacomp_or_statement(self, p):
|
|
""" pragmacomp_or_statement : pppragma_directive_list statement
|
|
| statement
|
|
"""
|
|
if len(p) == 3:
|
|
p[0] = c_ast.Compound(
|
|
block_items=p[1]+[p[2]],
|
|
coord=self._token_coord(p, 1))
|
|
else:
|
|
p[0] = p[1]
|
|
|
|
# In C, declarations can come several in a line:
|
|
# int x, *px, romulo = 5;
|
|
#
|
|
# However, for the AST, we will split them to separate Decl
|
|
# nodes.
|
|
#
|
|
# This rule splits its declarations and always returns a list
|
|
# of Decl nodes, even if it's one element long.
|
|
#
|
|
def p_decl_body(self, p):
|
|
""" decl_body : declaration_specifiers init_declarator_list_opt
|
|
| declaration_specifiers_no_type id_init_declarator_list_opt
|
|
"""
|
|
spec = p[1]
|
|
|
|
# p[2] (init_declarator_list_opt) is either a list or None
|
|
#
|
|
if p[2] is None:
|
|
# By the standard, you must have at least one declarator unless
|
|
# declaring a structure tag, a union tag, or the members of an
|
|
# enumeration.
|
|
#
|
|
ty = spec['type']
|
|
s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
|
|
if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
|
|
decls = [c_ast.Decl(
|
|
name=None,
|
|
quals=spec['qual'],
|
|
align=spec['alignment'],
|
|
storage=spec['storage'],
|
|
funcspec=spec['function'],
|
|
type=ty[0],
|
|
init=None,
|
|
bitsize=None,
|
|
coord=ty[0].coord)]
|
|
|
|
# However, this case can also occur on redeclared identifiers in
|
|
# an inner scope. The trouble is that the redeclared type's name
|
|
# gets grouped into declaration_specifiers; _build_declarations
|
|
# compensates for this.
|
|
#
|
|
else:
|
|
decls = self._build_declarations(
|
|
spec=spec,
|
|
decls=[dict(decl=None, init=None)],
|
|
typedef_namespace=True)
|
|
|
|
else:
|
|
decls = self._build_declarations(
|
|
spec=spec,
|
|
decls=p[2],
|
|
typedef_namespace=True)
|
|
|
|
p[0] = decls
|
|
|
|
# The declaration has been split to a decl_body sub-rule and
|
|
# SEMI, because having them in a single rule created a problem
|
|
# for defining typedefs.
|
|
#
|
|
# If a typedef line was directly followed by a line using the
|
|
# type defined with the typedef, the type would not be
|
|
# recognized. This is because to reduce the declaration rule,
|
|
# the parser's lookahead asked for the token after SEMI, which
|
|
# was the type from the next line, and the lexer had no chance
|
|
# to see the updated type symbol table.
|
|
#
|
|
# Splitting solves this problem, because after seeing SEMI,
|
|
# the parser reduces decl_body, which actually adds the new
|
|
# type into the table to be seen by the lexer before the next
|
|
# line is reached.
|
|
def p_declaration(self, p):
|
|
""" declaration : decl_body SEMI
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
# Since each declaration is a list of declarations, this
|
|
# rule will combine all the declarations and return a single
|
|
# list
|
|
#
|
|
def p_declaration_list(self, p):
|
|
""" declaration_list : declaration
|
|
| declaration_list declaration
|
|
"""
|
|
p[0] = p[1] if len(p) == 2 else p[1] + p[2]
|
|
|
|
# To know when declaration-specifiers end and declarators begin,
|
|
# we require declaration-specifiers to have at least one
|
|
# type-specifier, and disallow typedef-names after we've seen any
|
|
# type-specifier. These are both required by the spec.
|
|
#
|
|
def p_declaration_specifiers_no_type_1(self, p):
|
|
""" declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
|
|
|
|
def p_declaration_specifiers_no_type_2(self, p):
|
|
""" declaration_specifiers_no_type : storage_class_specifier declaration_specifiers_no_type_opt
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[2], p[1], 'storage')
|
|
|
|
def p_declaration_specifiers_no_type_3(self, p):
|
|
""" declaration_specifiers_no_type : function_specifier declaration_specifiers_no_type_opt
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[2], p[1], 'function')
|
|
|
|
# Without this, `typedef _Atomic(T) U` will parse incorrectly because the
|
|
# _Atomic qualifier will match, instead of the specifier.
|
|
def p_declaration_specifiers_no_type_4(self, p):
|
|
""" declaration_specifiers_no_type : atomic_specifier declaration_specifiers_no_type_opt
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[2], p[1], 'type')
|
|
|
|
def p_declaration_specifiers_no_type_5(self, p):
|
|
""" declaration_specifiers_no_type : alignment_specifier declaration_specifiers_no_type_opt
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[2], p[1], 'alignment')
|
|
|
|
def p_declaration_specifiers_1(self, p):
|
|
""" declaration_specifiers : declaration_specifiers type_qualifier
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
|
|
|
|
def p_declaration_specifiers_2(self, p):
|
|
""" declaration_specifiers : declaration_specifiers storage_class_specifier
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[1], p[2], 'storage', append=True)
|
|
|
|
def p_declaration_specifiers_3(self, p):
|
|
""" declaration_specifiers : declaration_specifiers function_specifier
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[1], p[2], 'function', append=True)
|
|
|
|
def p_declaration_specifiers_4(self, p):
|
|
""" declaration_specifiers : declaration_specifiers type_specifier_no_typeid
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
|
|
|
|
def p_declaration_specifiers_5(self, p):
|
|
""" declaration_specifiers : type_specifier
|
|
"""
|
|
p[0] = self._add_declaration_specifier(None, p[1], 'type')
|
|
|
|
def p_declaration_specifiers_6(self, p):
|
|
""" declaration_specifiers : declaration_specifiers_no_type type_specifier
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
|
|
|
|
def p_declaration_specifiers_7(self, p):
|
|
""" declaration_specifiers : declaration_specifiers alignment_specifier
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment', append=True)
|
|
|
|
def p_storage_class_specifier(self, p):
|
|
""" storage_class_specifier : AUTO
|
|
| REGISTER
|
|
| STATIC
|
|
| EXTERN
|
|
| TYPEDEF
|
|
| _THREAD_LOCAL
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
def p_function_specifier(self, p):
|
|
""" function_specifier : INLINE
|
|
| _NORETURN
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
def p_type_specifier_no_typeid(self, p):
|
|
""" type_specifier_no_typeid : VOID
|
|
| _BOOL
|
|
| CHAR
|
|
| SHORT
|
|
| INT
|
|
| LONG
|
|
| FLOAT
|
|
| DOUBLE
|
|
| _COMPLEX
|
|
| SIGNED
|
|
| UNSIGNED
|
|
| __INT128
|
|
"""
|
|
p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
|
|
|
|
def p_type_specifier(self, p):
|
|
""" type_specifier : typedef_name
|
|
| enum_specifier
|
|
| struct_or_union_specifier
|
|
| type_specifier_no_typeid
|
|
| atomic_specifier
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
# See section 6.7.2.4 of the C11 standard.
|
|
def p_atomic_specifier(self, p):
|
|
""" atomic_specifier : _ATOMIC LPAREN type_name RPAREN
|
|
"""
|
|
typ = p[3]
|
|
typ.quals.append('_Atomic')
|
|
p[0] = typ
|
|
|
|
def p_type_qualifier(self, p):
|
|
""" type_qualifier : CONST
|
|
| RESTRICT
|
|
| VOLATILE
|
|
| _ATOMIC
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
def p_init_declarator_list(self, p):
|
|
""" init_declarator_list : init_declarator
|
|
| init_declarator_list COMMA init_declarator
|
|
"""
|
|
p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
|
|
|
|
# Returns a {decl=<declarator> : init=<initializer>} dictionary
|
|
# If there's no initializer, uses None
|
|
#
|
|
def p_init_declarator(self, p):
|
|
""" init_declarator : declarator
|
|
| declarator EQUALS initializer
|
|
"""
|
|
p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
|
|
|
|
def p_id_init_declarator_list(self, p):
|
|
""" id_init_declarator_list : id_init_declarator
|
|
| id_init_declarator_list COMMA init_declarator
|
|
"""
|
|
p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
|
|
|
|
def p_id_init_declarator(self, p):
|
|
""" id_init_declarator : id_declarator
|
|
| id_declarator EQUALS initializer
|
|
"""
|
|
p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
|
|
|
|
# Require at least one type specifier in a specifier-qualifier-list
|
|
#
|
|
def p_specifier_qualifier_list_1(self, p):
|
|
""" specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
|
|
|
|
def p_specifier_qualifier_list_2(self, p):
|
|
""" specifier_qualifier_list : specifier_qualifier_list type_qualifier
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
|
|
|
|
def p_specifier_qualifier_list_3(self, p):
|
|
""" specifier_qualifier_list : type_specifier
|
|
"""
|
|
p[0] = self._add_declaration_specifier(None, p[1], 'type')
|
|
|
|
def p_specifier_qualifier_list_4(self, p):
|
|
""" specifier_qualifier_list : type_qualifier_list type_specifier
|
|
"""
|
|
p[0] = dict(qual=p[1], alignment=[], storage=[], type=[p[2]], function=[])
|
|
|
|
def p_specifier_qualifier_list_5(self, p):
|
|
""" specifier_qualifier_list : alignment_specifier
|
|
"""
|
|
p[0] = dict(qual=[], alignment=[p[1]], storage=[], type=[], function=[])
|
|
|
|
def p_specifier_qualifier_list_6(self, p):
|
|
""" specifier_qualifier_list : specifier_qualifier_list alignment_specifier
|
|
"""
|
|
p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment')
|
|
|
|
# TYPEID is allowed here (and in other struct/enum related tag names), because
|
|
# struct/enum tags reside in their own namespace and can be named the same as types
|
|
#
|
|
def p_struct_or_union_specifier_1(self, p):
|
|
""" struct_or_union_specifier : struct_or_union ID
|
|
| struct_or_union TYPEID
|
|
"""
|
|
klass = self._select_struct_union_class(p[1])
|
|
# None means no list of members
|
|
p[0] = klass(
|
|
name=p[2],
|
|
decls=None,
|
|
coord=self._token_coord(p, 2))
|
|
|
|
def p_struct_or_union_specifier_2(self, p):
|
|
""" struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close
|
|
| struct_or_union brace_open brace_close
|
|
"""
|
|
klass = self._select_struct_union_class(p[1])
|
|
if len(p) == 4:
|
|
# Empty sequence means an empty list of members
|
|
p[0] = klass(
|
|
name=None,
|
|
decls=[],
|
|
coord=self._token_coord(p, 2))
|
|
else:
|
|
p[0] = klass(
|
|
name=None,
|
|
decls=p[3],
|
|
coord=self._token_coord(p, 2))
|
|
|
|
|
|
def p_struct_or_union_specifier_3(self, p):
|
|
""" struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close
|
|
| struct_or_union ID brace_open brace_close
|
|
| struct_or_union TYPEID brace_open struct_declaration_list brace_close
|
|
| struct_or_union TYPEID brace_open brace_close
|
|
"""
|
|
klass = self._select_struct_union_class(p[1])
|
|
if len(p) == 5:
|
|
# Empty sequence means an empty list of members
|
|
p[0] = klass(
|
|
name=p[2],
|
|
decls=[],
|
|
coord=self._token_coord(p, 2))
|
|
else:
|
|
p[0] = klass(
|
|
name=p[2],
|
|
decls=p[4],
|
|
coord=self._token_coord(p, 2))
|
|
|
|
def p_struct_or_union(self, p):
|
|
""" struct_or_union : STRUCT
|
|
| UNION
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
# Combine all declarations into a single list
|
|
#
|
|
def p_struct_declaration_list(self, p):
|
|
""" struct_declaration_list : struct_declaration
|
|
| struct_declaration_list struct_declaration
|
|
"""
|
|
if len(p) == 2:
|
|
p[0] = p[1] or []
|
|
else:
|
|
p[0] = p[1] + (p[2] or [])
|
|
|
|
def p_struct_declaration_1(self, p):
|
|
""" struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI
|
|
"""
|
|
spec = p[1]
|
|
assert 'typedef' not in spec['storage']
|
|
|
|
if p[2] is not None:
|
|
decls = self._build_declarations(
|
|
spec=spec,
|
|
decls=p[2])
|
|
|
|
elif len(spec['type']) == 1:
|
|
# Anonymous struct/union, gcc extension, C1x feature.
|
|
# Although the standard only allows structs/unions here, I see no
|
|
# reason to disallow other types since some compilers have typedefs
|
|
# here, and pycparser isn't about rejecting all invalid code.
|
|
#
|
|
node = spec['type'][0]
|
|
if isinstance(node, c_ast.Node):
|
|
decl_type = node
|
|
else:
|
|
decl_type = c_ast.IdentifierType(node)
|
|
|
|
decls = self._build_declarations(
|
|
spec=spec,
|
|
decls=[dict(decl=decl_type)])
|
|
|
|
else:
|
|
# Structure/union members can have the same names as typedefs.
|
|
# The trouble is that the member's name gets grouped into
|
|
# specifier_qualifier_list; _build_declarations compensates.
|
|
#
|
|
decls = self._build_declarations(
|
|
spec=spec,
|
|
decls=[dict(decl=None, init=None)])
|
|
|
|
p[0] = decls
|
|
|
|
def p_struct_declaration_2(self, p):
|
|
""" struct_declaration : SEMI
|
|
"""
|
|
p[0] = None
|
|
|
|
def p_struct_declaration_3(self, p):
|
|
""" struct_declaration : pppragma_directive
|
|
"""
|
|
p[0] = [p[1]]
|
|
|
|
def p_struct_declarator_list(self, p):
|
|
""" struct_declarator_list : struct_declarator
|
|
| struct_declarator_list COMMA struct_declarator
|
|
"""
|
|
p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
|
|
|
|
# struct_declarator passes up a dict with the keys: decl (for
|
|
# the underlying declarator) and bitsize (for the bitsize)
|
|
#
|
|
def p_struct_declarator_1(self, p):
|
|
""" struct_declarator : declarator
|
|
"""
|
|
p[0] = {'decl': p[1], 'bitsize': None}
|
|
|
|
def p_struct_declarator_2(self, p):
|
|
""" struct_declarator : declarator COLON constant_expression
|
|
| COLON constant_expression
|
|
"""
|
|
if len(p) > 3:
|
|
p[0] = {'decl': p[1], 'bitsize': p[3]}
|
|
else:
|
|
p[0] = {'decl': c_ast.TypeDecl(None, None, None, None), 'bitsize': p[2]}
|
|
|
|
def p_enum_specifier_1(self, p):
|
|
""" enum_specifier : ENUM ID
|
|
| ENUM TYPEID
|
|
"""
|
|
p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1))
|
|
|
|
def p_enum_specifier_2(self, p):
|
|
""" enum_specifier : ENUM brace_open enumerator_list brace_close
|
|
"""
|
|
p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1))
|
|
|
|
def p_enum_specifier_3(self, p):
|
|
""" enum_specifier : ENUM ID brace_open enumerator_list brace_close
|
|
| ENUM TYPEID brace_open enumerator_list brace_close
|
|
"""
|
|
p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1))
|
|
|
|
def p_enumerator_list(self, p):
|
|
""" enumerator_list : enumerator
|
|
| enumerator_list COMMA
|
|
| enumerator_list COMMA enumerator
|
|
"""
|
|
if len(p) == 2:
|
|
p[0] = c_ast.EnumeratorList([p[1]], p[1].coord)
|
|
elif len(p) == 3:
|
|
p[0] = p[1]
|
|
else:
|
|
p[1].enumerators.append(p[3])
|
|
p[0] = p[1]
|
|
|
|
def p_alignment_specifier(self, p):
|
|
""" alignment_specifier : _ALIGNAS LPAREN type_name RPAREN
|
|
| _ALIGNAS LPAREN constant_expression RPAREN
|
|
"""
|
|
p[0] = c_ast.Alignas(p[3], self._token_coord(p, 1))
|
|
|
|
def p_enumerator(self, p):
|
|
""" enumerator : ID
|
|
| ID EQUALS constant_expression
|
|
"""
|
|
if len(p) == 2:
|
|
enumerator = c_ast.Enumerator(
|
|
p[1], None,
|
|
self._token_coord(p, 1))
|
|
else:
|
|
enumerator = c_ast.Enumerator(
|
|
p[1], p[3],
|
|
self._token_coord(p, 1))
|
|
self._add_identifier(enumerator.name, enumerator.coord)
|
|
|
|
p[0] = enumerator
|
|
|
|
def p_declarator(self, p):
|
|
""" declarator : id_declarator
|
|
| typeid_declarator
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
@parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
|
|
def p_xxx_declarator_1(self, p):
|
|
""" xxx_declarator : direct_xxx_declarator
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
@parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
|
|
def p_xxx_declarator_2(self, p):
|
|
""" xxx_declarator : pointer direct_xxx_declarator
|
|
"""
|
|
p[0] = self._type_modify_decl(p[2], p[1])
|
|
|
|
@parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
|
|
def p_direct_xxx_declarator_1(self, p):
|
|
""" direct_xxx_declarator : yyy
|
|
"""
|
|
p[0] = c_ast.TypeDecl(
|
|
declname=p[1],
|
|
type=None,
|
|
quals=None,
|
|
align=None,
|
|
coord=self._token_coord(p, 1))
|
|
|
|
@parameterized(('id', 'ID'), ('typeid', 'TYPEID'))
|
|
def p_direct_xxx_declarator_2(self, p):
|
|
""" direct_xxx_declarator : LPAREN xxx_declarator RPAREN
|
|
"""
|
|
p[0] = p[2]
|
|
|
|
@parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
|
|
def p_direct_xxx_declarator_3(self, p):
|
|
""" direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
|
|
"""
|
|
quals = (p[3] if len(p) > 5 else []) or []
|
|
# Accept dimension qualifiers
|
|
# Per C99 6.7.5.3 p7
|
|
arr = c_ast.ArrayDecl(
|
|
type=None,
|
|
dim=p[4] if len(p) > 5 else p[3],
|
|
dim_quals=quals,
|
|
coord=p[1].coord)
|
|
|
|
p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
|
|
|
|
@parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
|
|
def p_direct_xxx_declarator_4(self, p):
|
|
""" direct_xxx_declarator : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET
|
|
| direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET
|
|
"""
|
|
# Using slice notation for PLY objects doesn't work in Python 3 for the
|
|
# version of PLY embedded with pycparser; see PLY Google Code issue 30.
|
|
# Work around that here by listing the two elements separately.
|
|
listed_quals = [item if isinstance(item, list) else [item]
|
|
for item in [p[3],p[4]]]
|
|
dim_quals = [qual for sublist in listed_quals for qual in sublist
|
|
if qual is not None]
|
|
arr = c_ast.ArrayDecl(
|
|
type=None,
|
|
dim=p[5],
|
|
dim_quals=dim_quals,
|
|
coord=p[1].coord)
|
|
|
|
p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
|
|
|
|
# Special for VLAs
|
|
#
|
|
@parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
|
|
def p_direct_xxx_declarator_5(self, p):
|
|
""" direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET
|
|
"""
|
|
arr = c_ast.ArrayDecl(
|
|
type=None,
|
|
dim=c_ast.ID(p[4], self._token_coord(p, 4)),
|
|
dim_quals=p[3] if p[3] is not None else [],
|
|
coord=p[1].coord)
|
|
|
|
p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
|
|
|
|
@parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
|
|
def p_direct_xxx_declarator_6(self, p):
|
|
""" direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN
|
|
| direct_xxx_declarator LPAREN identifier_list_opt RPAREN
|
|
"""
|
|
func = c_ast.FuncDecl(
|
|
args=p[3],
|
|
type=None,
|
|
coord=p[1].coord)
|
|
|
|
# To see why _get_yacc_lookahead_token is needed, consider:
|
|
# typedef char TT;
|
|
# void foo(int TT) { TT = 10; }
|
|
# Outside the function, TT is a typedef, but inside (starting and
|
|
# ending with the braces) it's a parameter. The trouble begins with
|
|
# yacc's lookahead token. We don't know if we're declaring or
|
|
# defining a function until we see LBRACE, but if we wait for yacc to
|
|
# trigger a rule on that token, then TT will have already been read
|
|
# and incorrectly interpreted as TYPEID. We need to add the
|
|
# parameters to the scope the moment the lexer sees LBRACE.
|
|
#
|
|
if self._get_yacc_lookahead_token().type == "LBRACE":
|
|
if func.args is not None:
|
|
for param in func.args.params:
|
|
if isinstance(param, c_ast.EllipsisParam): break
|
|
self._add_identifier(param.name, param.coord)
|
|
|
|
p[0] = self._type_modify_decl(decl=p[1], modifier=func)
|
|
|
|
def p_pointer(self, p):
|
|
""" pointer : TIMES type_qualifier_list_opt
|
|
| TIMES type_qualifier_list_opt pointer
|
|
"""
|
|
coord = self._token_coord(p, 1)
|
|
# Pointer decls nest from inside out. This is important when different
|
|
# levels have different qualifiers. For example:
|
|
#
|
|
# char * const * p;
|
|
#
|
|
# Means "pointer to const pointer to char"
|
|
#
|
|
# While:
|
|
#
|
|
# char ** const p;
|
|
#
|
|
# Means "const pointer to pointer to char"
|
|
#
|
|
# So when we construct PtrDecl nestings, the leftmost pointer goes in
|
|
# as the most nested type.
|
|
nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord)
|
|
if len(p) > 3:
|
|
tail_type = p[3]
|
|
while tail_type.type is not None:
|
|
tail_type = tail_type.type
|
|
tail_type.type = nested_type
|
|
p[0] = p[3]
|
|
else:
|
|
p[0] = nested_type
|
|
|
|
def p_type_qualifier_list(self, p):
|
|
""" type_qualifier_list : type_qualifier
|
|
| type_qualifier_list type_qualifier
|
|
"""
|
|
p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
|
|
|
|
def p_parameter_type_list(self, p):
|
|
""" parameter_type_list : parameter_list
|
|
| parameter_list COMMA ELLIPSIS
|
|
"""
|
|
if len(p) > 2:
|
|
p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3)))
|
|
|
|
p[0] = p[1]
|
|
|
|
def p_parameter_list(self, p):
|
|
""" parameter_list : parameter_declaration
|
|
| parameter_list COMMA parameter_declaration
|
|
"""
|
|
if len(p) == 2: # single parameter
|
|
p[0] = c_ast.ParamList([p[1]], p[1].coord)
|
|
else:
|
|
p[1].params.append(p[3])
|
|
p[0] = p[1]
|
|
|
|
# From ISO/IEC 9899:TC2, 6.7.5.3.11:
|
|
# "If, in a parameter declaration, an identifier can be treated either
|
|
# as a typedef name or as a parameter name, it shall be taken as a
|
|
# typedef name."
|
|
#
|
|
# Inside a parameter declaration, once we've reduced declaration specifiers,
|
|
# if we shift in an LPAREN and see a TYPEID, it could be either an abstract
|
|
# declarator or a declarator nested inside parens. This rule tells us to
|
|
# always treat it as an abstract declarator. Therefore, we only accept
|
|
# `id_declarator`s and `typeid_noparen_declarator`s.
|
|
def p_parameter_declaration_1(self, p):
|
|
""" parameter_declaration : declaration_specifiers id_declarator
|
|
| declaration_specifiers typeid_noparen_declarator
|
|
"""
|
|
spec = p[1]
|
|
if not spec['type']:
|
|
spec['type'] = [c_ast.IdentifierType(['int'],
|
|
coord=self._token_coord(p, 1))]
|
|
p[0] = self._build_declarations(
|
|
spec=spec,
|
|
decls=[dict(decl=p[2])])[0]
|
|
|
|
def p_parameter_declaration_2(self, p):
|
|
""" parameter_declaration : declaration_specifiers abstract_declarator_opt
|
|
"""
|
|
spec = p[1]
|
|
if not spec['type']:
|
|
spec['type'] = [c_ast.IdentifierType(['int'],
|
|
coord=self._token_coord(p, 1))]
|
|
|
|
# Parameters can have the same names as typedefs. The trouble is that
|
|
# the parameter's name gets grouped into declaration_specifiers, making
|
|
# it look like an old-style declaration; compensate.
|
|
#
|
|
if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \
|
|
self._is_type_in_scope(spec['type'][-1].names[0]):
|
|
decl = self._build_declarations(
|
|
spec=spec,
|
|
decls=[dict(decl=p[2], init=None)])[0]
|
|
|
|
# This truly is an old-style parameter declaration
|
|
#
|
|
else:
|
|
decl = c_ast.Typename(
|
|
name='',
|
|
quals=spec['qual'],
|
|
align=None,
|
|
type=p[2] or c_ast.TypeDecl(None, None, None, None),
|
|
coord=self._token_coord(p, 2))
|
|
typename = spec['type']
|
|
decl = self._fix_decl_name_type(decl, typename)
|
|
|
|
p[0] = decl
|
|
|
|
def p_identifier_list(self, p):
|
|
""" identifier_list : identifier
|
|
| identifier_list COMMA identifier
|
|
"""
|
|
if len(p) == 2: # single parameter
|
|
p[0] = c_ast.ParamList([p[1]], p[1].coord)
|
|
else:
|
|
p[1].params.append(p[3])
|
|
p[0] = p[1]
|
|
|
|
def p_initializer_1(self, p):
|
|
""" initializer : assignment_expression
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
def p_initializer_2(self, p):
|
|
""" initializer : brace_open initializer_list_opt brace_close
|
|
| brace_open initializer_list COMMA brace_close
|
|
"""
|
|
if p[2] is None:
|
|
p[0] = c_ast.InitList([], self._token_coord(p, 1))
|
|
else:
|
|
p[0] = p[2]
|
|
|
|
def p_initializer_list(self, p):
|
|
""" initializer_list : designation_opt initializer
|
|
| initializer_list COMMA designation_opt initializer
|
|
"""
|
|
if len(p) == 3: # single initializer
|
|
init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2])
|
|
p[0] = c_ast.InitList([init], p[2].coord)
|
|
else:
|
|
init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4])
|
|
p[1].exprs.append(init)
|
|
p[0] = p[1]
|
|
|
|
def p_designation(self, p):
|
|
""" designation : designator_list EQUALS
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
# Designators are represented as a list of nodes, in the order in which
|
|
# they're written in the code.
|
|
#
|
|
def p_designator_list(self, p):
|
|
""" designator_list : designator
|
|
| designator_list designator
|
|
"""
|
|
p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
|
|
|
|
def p_designator(self, p):
|
|
""" designator : LBRACKET constant_expression RBRACKET
|
|
| PERIOD identifier
|
|
"""
|
|
p[0] = p[2]
|
|
|
|
def p_type_name(self, p):
|
|
""" type_name : specifier_qualifier_list abstract_declarator_opt
|
|
"""
|
|
typename = c_ast.Typename(
|
|
name='',
|
|
quals=p[1]['qual'][:],
|
|
align=None,
|
|
type=p[2] or c_ast.TypeDecl(None, None, None, None),
|
|
coord=self._token_coord(p, 2))
|
|
|
|
p[0] = self._fix_decl_name_type(typename, p[1]['type'])
|
|
|
|
def p_abstract_declarator_1(self, p):
|
|
""" abstract_declarator : pointer
|
|
"""
|
|
dummytype = c_ast.TypeDecl(None, None, None, None)
|
|
p[0] = self._type_modify_decl(
|
|
decl=dummytype,
|
|
modifier=p[1])
|
|
|
|
def p_abstract_declarator_2(self, p):
|
|
""" abstract_declarator : pointer direct_abstract_declarator
|
|
"""
|
|
p[0] = self._type_modify_decl(p[2], p[1])
|
|
|
|
def p_abstract_declarator_3(self, p):
|
|
""" abstract_declarator : direct_abstract_declarator
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
# Creating and using direct_abstract_declarator_opt here
|
|
# instead of listing both direct_abstract_declarator and the
|
|
# lack of it in the beginning of _1 and _2 caused two
|
|
# shift/reduce errors.
|
|
#
|
|
def p_direct_abstract_declarator_1(self, p):
|
|
""" direct_abstract_declarator : LPAREN abstract_declarator RPAREN """
|
|
p[0] = p[2]
|
|
|
|
def p_direct_abstract_declarator_2(self, p):
|
|
""" direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET
|
|
"""
|
|
arr = c_ast.ArrayDecl(
|
|
type=None,
|
|
dim=p[3],
|
|
dim_quals=[],
|
|
coord=p[1].coord)
|
|
|
|
p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
|
|
|
|
def p_direct_abstract_declarator_3(self, p):
|
|
""" direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
|
|
"""
|
|
quals = (p[2] if len(p) > 4 else []) or []
|
|
p[0] = c_ast.ArrayDecl(
|
|
type=c_ast.TypeDecl(None, None, None, None),
|
|
dim=p[3] if len(p) > 4 else p[2],
|
|
dim_quals=quals,
|
|
coord=self._token_coord(p, 1))
|
|
|
|
def p_direct_abstract_declarator_4(self, p):
|
|
""" direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET
|
|
"""
|
|
arr = c_ast.ArrayDecl(
|
|
type=None,
|
|
dim=c_ast.ID(p[3], self._token_coord(p, 3)),
|
|
dim_quals=[],
|
|
coord=p[1].coord)
|
|
|
|
p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
|
|
|
|
def p_direct_abstract_declarator_5(self, p):
|
|
""" direct_abstract_declarator : LBRACKET TIMES RBRACKET
|
|
"""
|
|
p[0] = c_ast.ArrayDecl(
|
|
type=c_ast.TypeDecl(None, None, None, None),
|
|
dim=c_ast.ID(p[3], self._token_coord(p, 3)),
|
|
dim_quals=[],
|
|
coord=self._token_coord(p, 1))
|
|
|
|
def p_direct_abstract_declarator_6(self, p):
|
|
""" direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN
|
|
"""
|
|
func = c_ast.FuncDecl(
|
|
args=p[3],
|
|
type=None,
|
|
coord=p[1].coord)
|
|
|
|
p[0] = self._type_modify_decl(decl=p[1], modifier=func)
|
|
|
|
def p_direct_abstract_declarator_7(self, p):
|
|
""" direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN
|
|
"""
|
|
p[0] = c_ast.FuncDecl(
|
|
args=p[2],
|
|
type=c_ast.TypeDecl(None, None, None, None),
|
|
coord=self._token_coord(p, 1))
|
|
|
|
# declaration is a list, statement isn't. To make it consistent, block_item
|
|
# will always be a list
|
|
#
|
|
def p_block_item(self, p):
|
|
""" block_item : declaration
|
|
| statement
|
|
"""
|
|
p[0] = p[1] if isinstance(p[1], list) else [p[1]]
|
|
|
|
# Since we made block_item a list, this just combines lists
|
|
#
|
|
def p_block_item_list(self, p):
|
|
""" block_item_list : block_item
|
|
| block_item_list block_item
|
|
"""
|
|
# Empty block items (plain ';') produce [None], so ignore them
|
|
p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2]
|
|
|
|
def p_compound_statement_1(self, p):
|
|
""" compound_statement : brace_open block_item_list_opt brace_close """
|
|
p[0] = c_ast.Compound(
|
|
block_items=p[2],
|
|
coord=self._token_coord(p, 1))
|
|
|
|
def p_labeled_statement_1(self, p):
|
|
""" labeled_statement : ID COLON pragmacomp_or_statement """
|
|
p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1))
|
|
|
|
def p_labeled_statement_2(self, p):
|
|
""" labeled_statement : CASE constant_expression COLON pragmacomp_or_statement """
|
|
p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1))
|
|
|
|
def p_labeled_statement_3(self, p):
|
|
""" labeled_statement : DEFAULT COLON pragmacomp_or_statement """
|
|
p[0] = c_ast.Default([p[3]], self._token_coord(p, 1))
|
|
|
|
def p_selection_statement_1(self, p):
|
|
""" selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement """
|
|
p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1))
|
|
|
|
def p_selection_statement_2(self, p):
|
|
""" selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement """
|
|
p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1))
|
|
|
|
def p_selection_statement_3(self, p):
|
|
""" selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement """
|
|
p[0] = fix_switch_cases(
|
|
c_ast.Switch(p[3], p[5], self._token_coord(p, 1)))
|
|
|
|
def p_iteration_statement_1(self, p):
|
|
""" iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement """
|
|
p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1))
|
|
|
|
def p_iteration_statement_2(self, p):
|
|
""" iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI """
|
|
p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1))
|
|
|
|
def p_iteration_statement_3(self, p):
|
|
""" iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
|
|
p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1))
|
|
|
|
def p_iteration_statement_4(self, p):
|
|
""" iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
|
|
p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)),
|
|
p[4], p[6], p[8], self._token_coord(p, 1))
|
|
|
|
def p_jump_statement_1(self, p):
|
|
""" jump_statement : GOTO ID SEMI """
|
|
p[0] = c_ast.Goto(p[2], self._token_coord(p, 1))
|
|
|
|
def p_jump_statement_2(self, p):
|
|
""" jump_statement : BREAK SEMI """
|
|
p[0] = c_ast.Break(self._token_coord(p, 1))
|
|
|
|
def p_jump_statement_3(self, p):
|
|
""" jump_statement : CONTINUE SEMI """
|
|
p[0] = c_ast.Continue(self._token_coord(p, 1))
|
|
|
|
def p_jump_statement_4(self, p):
|
|
""" jump_statement : RETURN expression SEMI
|
|
| RETURN SEMI
|
|
"""
|
|
p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1))
|
|
|
|
def p_expression_statement(self, p):
|
|
""" expression_statement : expression_opt SEMI """
|
|
if p[1] is None:
|
|
p[0] = c_ast.EmptyStatement(self._token_coord(p, 2))
|
|
else:
|
|
p[0] = p[1]
|
|
|
|
def p_expression(self, p):
|
|
""" expression : assignment_expression
|
|
| expression COMMA assignment_expression
|
|
"""
|
|
if len(p) == 2:
|
|
p[0] = p[1]
|
|
else:
|
|
if not isinstance(p[1], c_ast.ExprList):
|
|
p[1] = c_ast.ExprList([p[1]], p[1].coord)
|
|
|
|
p[1].exprs.append(p[3])
|
|
p[0] = p[1]
|
|
|
|
def p_parenthesized_compound_expression(self, p):
|
|
""" assignment_expression : LPAREN compound_statement RPAREN """
|
|
p[0] = p[2]
|
|
|
|
def p_typedef_name(self, p):
|
|
""" typedef_name : TYPEID """
|
|
p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
|
|
|
|
def p_assignment_expression(self, p):
|
|
""" assignment_expression : conditional_expression
|
|
| unary_expression assignment_operator assignment_expression
|
|
"""
|
|
if len(p) == 2:
|
|
p[0] = p[1]
|
|
else:
|
|
p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord)
|
|
|
|
# K&R2 defines these as many separate rules, to encode
|
|
# precedence and associativity. Why work hard ? I'll just use
|
|
# the built in precedence/associativity specification feature
|
|
# of PLY. (see precedence declaration above)
|
|
#
|
|
def p_assignment_operator(self, p):
|
|
""" assignment_operator : EQUALS
|
|
| XOREQUAL
|
|
| TIMESEQUAL
|
|
| DIVEQUAL
|
|
| MODEQUAL
|
|
| PLUSEQUAL
|
|
| MINUSEQUAL
|
|
| LSHIFTEQUAL
|
|
| RSHIFTEQUAL
|
|
| ANDEQUAL
|
|
| OREQUAL
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
def p_constant_expression(self, p):
|
|
""" constant_expression : conditional_expression """
|
|
p[0] = p[1]
|
|
|
|
def p_conditional_expression(self, p):
|
|
""" conditional_expression : binary_expression
|
|
| binary_expression CONDOP expression COLON conditional_expression
|
|
"""
|
|
if len(p) == 2:
|
|
p[0] = p[1]
|
|
else:
|
|
p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord)
|
|
|
|
def p_binary_expression(self, p):
|
|
""" binary_expression : cast_expression
|
|
| binary_expression TIMES binary_expression
|
|
| binary_expression DIVIDE binary_expression
|
|
| binary_expression MOD binary_expression
|
|
| binary_expression PLUS binary_expression
|
|
| binary_expression MINUS binary_expression
|
|
| binary_expression RSHIFT binary_expression
|
|
| binary_expression LSHIFT binary_expression
|
|
| binary_expression LT binary_expression
|
|
| binary_expression LE binary_expression
|
|
| binary_expression GE binary_expression
|
|
| binary_expression GT binary_expression
|
|
| binary_expression EQ binary_expression
|
|
| binary_expression NE binary_expression
|
|
| binary_expression AND binary_expression
|
|
| binary_expression OR binary_expression
|
|
| binary_expression XOR binary_expression
|
|
| binary_expression LAND binary_expression
|
|
| binary_expression LOR binary_expression
|
|
"""
|
|
if len(p) == 2:
|
|
p[0] = p[1]
|
|
else:
|
|
p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord)
|
|
|
|
def p_cast_expression_1(self, p):
|
|
""" cast_expression : unary_expression """
|
|
p[0] = p[1]
|
|
|
|
def p_cast_expression_2(self, p):
|
|
""" cast_expression : LPAREN type_name RPAREN cast_expression """
|
|
p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1))
|
|
|
|
def p_unary_expression_1(self, p):
|
|
""" unary_expression : postfix_expression """
|
|
p[0] = p[1]
|
|
|
|
def p_unary_expression_2(self, p):
|
|
""" unary_expression : PLUSPLUS unary_expression
|
|
| MINUSMINUS unary_expression
|
|
| unary_operator cast_expression
|
|
"""
|
|
p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord)
|
|
|
|
def p_unary_expression_3(self, p):
|
|
""" unary_expression : SIZEOF unary_expression
|
|
| SIZEOF LPAREN type_name RPAREN
|
|
| _ALIGNOF LPAREN type_name RPAREN
|
|
"""
|
|
p[0] = c_ast.UnaryOp(
|
|
p[1],
|
|
p[2] if len(p) == 3 else p[3],
|
|
self._token_coord(p, 1))
|
|
|
|
def p_unary_operator(self, p):
|
|
""" unary_operator : AND
|
|
| TIMES
|
|
| PLUS
|
|
| MINUS
|
|
| NOT
|
|
| LNOT
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
def p_postfix_expression_1(self, p):
|
|
""" postfix_expression : primary_expression """
|
|
p[0] = p[1]
|
|
|
|
def p_postfix_expression_2(self, p):
|
|
""" postfix_expression : postfix_expression LBRACKET expression RBRACKET """
|
|
p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
|
|
|
|
def p_postfix_expression_3(self, p):
|
|
""" postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN
|
|
| postfix_expression LPAREN RPAREN
|
|
"""
|
|
p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord)
|
|
|
|
def p_postfix_expression_4(self, p):
|
|
""" postfix_expression : postfix_expression PERIOD ID
|
|
| postfix_expression PERIOD TYPEID
|
|
| postfix_expression ARROW ID
|
|
| postfix_expression ARROW TYPEID
|
|
"""
|
|
field = c_ast.ID(p[3], self._token_coord(p, 3))
|
|
p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord)
|
|
|
|
def p_postfix_expression_5(self, p):
|
|
""" postfix_expression : postfix_expression PLUSPLUS
|
|
| postfix_expression MINUSMINUS
|
|
"""
|
|
p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord)
|
|
|
|
def p_postfix_expression_6(self, p):
|
|
""" postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close
|
|
| LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close
|
|
"""
|
|
p[0] = c_ast.CompoundLiteral(p[2], p[5])
|
|
|
|
def p_primary_expression_1(self, p):
|
|
""" primary_expression : identifier """
|
|
p[0] = p[1]
|
|
|
|
def p_primary_expression_2(self, p):
|
|
""" primary_expression : constant """
|
|
p[0] = p[1]
|
|
|
|
def p_primary_expression_3(self, p):
|
|
""" primary_expression : unified_string_literal
|
|
| unified_wstring_literal
|
|
"""
|
|
p[0] = p[1]
|
|
|
|
def p_primary_expression_4(self, p):
|
|
""" primary_expression : LPAREN expression RPAREN """
|
|
p[0] = p[2]
|
|
|
|
def p_primary_expression_5(self, p):
|
|
""" primary_expression : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN
|
|
"""
|
|
coord = self._token_coord(p, 1)
|
|
p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord),
|
|
c_ast.ExprList([p[3], p[5]], coord),
|
|
coord)
|
|
|
|
def p_offsetof_member_designator(self, p):
|
|
""" offsetof_member_designator : identifier
|
|
| offsetof_member_designator PERIOD identifier
|
|
| offsetof_member_designator LBRACKET expression RBRACKET
|
|
"""
|
|
if len(p) == 2:
|
|
p[0] = p[1]
|
|
elif len(p) == 4:
|
|
p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord)
|
|
elif len(p) == 5:
|
|
p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
|
|
else:
|
|
raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p))
|
|
|
|
def p_argument_expression_list(self, p):
|
|
""" argument_expression_list : assignment_expression
|
|
| argument_expression_list COMMA assignment_expression
|
|
"""
|
|
if len(p) == 2: # single expr
|
|
p[0] = c_ast.ExprList([p[1]], p[1].coord)
|
|
else:
|
|
p[1].exprs.append(p[3])
|
|
p[0] = p[1]
|
|
|
|
def p_identifier(self, p):
|
|
""" identifier : ID """
|
|
p[0] = c_ast.ID(p[1], self._token_coord(p, 1))
|
|
|
|
def p_constant_1(self, p):
|
|
""" constant : INT_CONST_DEC
|
|
| INT_CONST_OCT
|
|
| INT_CONST_HEX
|
|
| INT_CONST_BIN
|
|
| INT_CONST_CHAR
|
|
"""
|
|
uCount = 0
|
|
lCount = 0
|
|
for x in p[1][-3:]:
|
|
if x in ('l', 'L'):
|
|
lCount += 1
|
|
elif x in ('u', 'U'):
|
|
uCount += 1
|
|
t = ''
|
|
if uCount > 1:
|
|
raise ValueError('Constant cannot have more than one u/U suffix.')
|
|
elif lCount > 2:
|
|
raise ValueError('Constant cannot have more than two l/L suffix.')
|
|
prefix = 'unsigned ' * uCount + 'long ' * lCount
|
|
p[0] = c_ast.Constant(
|
|
prefix + 'int', p[1], self._token_coord(p, 1))
|
|
|
|
def p_constant_2(self, p):
|
|
""" constant : FLOAT_CONST
|
|
| HEX_FLOAT_CONST
|
|
"""
|
|
if 'x' in p[1].lower():
|
|
t = 'float'
|
|
else:
|
|
if p[1][-1] in ('f', 'F'):
|
|
t = 'float'
|
|
elif p[1][-1] in ('l', 'L'):
|
|
t = 'long double'
|
|
else:
|
|
t = 'double'
|
|
|
|
p[0] = c_ast.Constant(
|
|
t, p[1], self._token_coord(p, 1))
|
|
|
|
def p_constant_3(self, p):
|
|
""" constant : CHAR_CONST
|
|
| WCHAR_CONST
|
|
| U8CHAR_CONST
|
|
| U16CHAR_CONST
|
|
| U32CHAR_CONST
|
|
"""
|
|
p[0] = c_ast.Constant(
|
|
'char', p[1], self._token_coord(p, 1))
|
|
|
|
# The "unified" string and wstring literal rules are for supporting
|
|
# concatenation of adjacent string literals.
|
|
# I.e. "hello " "world" is seen by the C compiler as a single string literal
|
|
# with the value "hello world"
|
|
#
|
|
def p_unified_string_literal(self, p):
|
|
""" unified_string_literal : STRING_LITERAL
|
|
| unified_string_literal STRING_LITERAL
|
|
"""
|
|
if len(p) == 2: # single literal
|
|
p[0] = c_ast.Constant(
|
|
'string', p[1], self._token_coord(p, 1))
|
|
else:
|
|
p[1].value = p[1].value[:-1] + p[2][1:]
|
|
p[0] = p[1]
|
|
|
|
def p_unified_wstring_literal(self, p):
|
|
""" unified_wstring_literal : WSTRING_LITERAL
|
|
| U8STRING_LITERAL
|
|
| U16STRING_LITERAL
|
|
| U32STRING_LITERAL
|
|
| unified_wstring_literal WSTRING_LITERAL
|
|
| unified_wstring_literal U8STRING_LITERAL
|
|
| unified_wstring_literal U16STRING_LITERAL
|
|
| unified_wstring_literal U32STRING_LITERAL
|
|
"""
|
|
if len(p) == 2: # single literal
|
|
p[0] = c_ast.Constant(
|
|
'string', p[1], self._token_coord(p, 1))
|
|
else:
|
|
p[1].value = p[1].value.rstrip()[:-1] + p[2][2:]
|
|
p[0] = p[1]
|
|
|
|
def p_brace_open(self, p):
|
|
""" brace_open : LBRACE
|
|
"""
|
|
p[0] = p[1]
|
|
p.set_lineno(0, p.lineno(1))
|
|
|
|
def p_brace_close(self, p):
|
|
""" brace_close : RBRACE
|
|
"""
|
|
p[0] = p[1]
|
|
p.set_lineno(0, p.lineno(1))
|
|
|
|
def p_empty(self, p):
|
|
'empty : '
|
|
p[0] = None
|
|
|
|
def p_error(self, p):
|
|
# If error recovery is added here in the future, make sure
|
|
# _get_yacc_lookahead_token still works!
|
|
#
|
|
if p:
|
|
self._parse_error(
|
|
'before: %s' % p.value,
|
|
self._coord(lineno=p.lineno,
|
|
column=self.clex.find_tok_column(p)))
|
|
else:
|
|
self._parse_error('At end of input', self.clex.filename)
|