diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..432a2519 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "submodule/pycparser"] + path = submodule/pycparser + url = https://github.com/eliben/pycparser diff --git a/src/C_TO_FSM.py b/src/C_TO_FSM.py index 481c5421..888d992a 100644 --- a/src/C_TO_FSM.py +++ b/src/C_TO_FSM.py @@ -3,6 +3,11 @@ import os import C_TO_LOGIC + +from utilities import REPO_ABS_DIR + +# TODO: Temporarily import from submodule, remove this hack when we create a proper pipelinec setup.py +sys.path.append(REPO_ABS_DIR() + '/submodule/pycparser') from pycparser import c_ast, c_generator # FSM funcs cant be main functions diff --git a/src/C_TO_LOGIC.py b/src/C_TO_LOGIC.py index eda23b09..3a943cee 100755 --- a/src/C_TO_LOGIC.py +++ b/src/C_TO_LOGIC.py @@ -24,6 +24,10 @@ # TODO fall back to clang tools? raise Exception("'cpp' C preprocessor is not installed!") +# TODO: Temporarily import from submodule, remove this hack when we create a proper pipelinec setup.py +sys.path.append(REPO_ABS_DIR() + '/submodule/pycparser') +from pycparser import c_ast, c_parser + # Global default constants for inferring different VHDL implementations of operators MULT_STYLE_INFERRED = "infer" MULT_STYLE_FABRIC = "fabric" diff --git a/src/RAW_VHDL.py b/src/RAW_VHDL.py index c7b9aa5f..555b7527 100755 --- a/src/RAW_VHDL.py +++ b/src/RAW_VHDL.py @@ -5,7 +5,12 @@ import C_TO_LOGIC import SW_LIB import VHDL -from pycparser import c_ast, c_parser # bleh for now + +from utilities import REPO_ABS_DIR + +# TODO: Temporarily import from submodule, remove this hack when we create a proper pipelinec setup.py +sys.path.append(REPO_ABS_DIR() + '/submodule/pycparser') +from pycparser import c_ast # Declare variables used internally to c built in C logic diff --git a/src/SW_LIB.py b/src/SW_LIB.py index b91d3e79..f32c9d80 100755 --- a/src/SW_LIB.py +++ b/src/SW_LIB.py @@ -8,6 +8,11 @@ import C_TO_LOGIC import SYN import VHDL + +from utilities import REPO_ABS_DIR + +# TODO: Temporarily import from submodule, remove this hack when we create a proper pipelinec setup.py +sys.path.append(REPO_ABS_DIR() + '/submodule/pycparser') from pycparser import c_ast # Hey lets bootstrap for fun diff --git a/src/VHDL.py b/src/VHDL.py index 2ae3d027..410bb378 100755 --- a/src/VHDL.py +++ b/src/VHDL.py @@ -14,6 +14,11 @@ import SYN import VERILATOR import VIVADO + +from utilities import REPO_ABS_DIR + +# TODO: Temporarily import from submodule, remove this hack when we create a proper pipelinec setup.py +sys.path.append(REPO_ABS_DIR() + '/submodule/pycparser') from pycparser import c_ast VHDL_FILE_EXT = ".vhd" diff --git a/src/pycparser/__init__.py b/src/pycparser/__init__.py deleted file mode 100755 index b595d99a..00000000 --- a/src/pycparser/__init__.py +++ /dev/null @@ -1,92 +0,0 @@ -# ----------------------------------------------------------------- -# pycparser: __init__.py -# -# This package file exports some convenience functions for -# interacting with pycparser -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -# ----------------------------------------------------------------- -__all__ = ["c_lexer", "c_parser", "c_ast"] -__version__ = "2.18" - -import io -from subprocess import check_output - -from .c_parser import CParser - - -def preprocess_file(filename, cpp_path="cpp", cpp_args=""): - """Preprocess a file using cpp. - - filename: - Name of the file you want to preprocess. - - cpp_path: - cpp_args: - Refer to the documentation of parse_file for the meaning of these - arguments. - - When successful, returns the preprocessed file's contents. - Errors from cpp will be printed out. - """ - path_list = [cpp_path] - if isinstance(cpp_args, list): - path_list += cpp_args - elif cpp_args != "": - path_list += [cpp_args] - path_list += [filename] - - try: - # Note the use of universal_newlines to treat all newlines - # as \n for Python's purpose - text = check_output(path_list, universal_newlines=True) - except OSError as e: - raise RuntimeError( - "Unable to invoke 'cpp'. " - + "Make sure its path was passed correctly\n" - + ("Original error: %s" % e) - ) - - return text - - -def parse_file(filename, use_cpp=False, cpp_path="cpp", cpp_args="", parser=None): - """Parse a C file using pycparser. - - filename: - Name of the file you want to parse. - - use_cpp: - Set to True if you want to execute the C pre-processor - on the file prior to parsing it. - - cpp_path: - If use_cpp is True, this is the path to 'cpp' on your - system. If no path is provided, it attempts to just - execute 'cpp', so it must be in your PATH. - - cpp_args: - If use_cpp is True, set this to the command line arguments strings - to cpp. Be careful with quotes - it's best to pass a raw string - (r'') here. For example: - r'-I../utils/fake_libc_include' - If several arguments are required, pass a list of strings. - - parser: - Optional parser object to be used instead of the default CParser - - When successful, an AST is returned. ParseError can be - thrown if the file doesn't parse successfully. - - Errors from cpp will be printed out. - """ - if use_cpp: - text = preprocess_file(filename, cpp_path, cpp_args) - else: - with io.open(filename) as f: - text = f.read() - - if parser is None: - parser = CParser() - return parser.parse(text, filename) diff --git a/src/pycparser/__init__.pyc b/src/pycparser/__init__.pyc deleted file mode 100644 index 10adb2dc..00000000 Binary files a/src/pycparser/__init__.pyc and /dev/null differ diff --git a/src/pycparser/_ast_gen.py b/src/pycparser/_ast_gen.py deleted file mode 100755 index 3e763e8e..00000000 --- a/src/pycparser/_ast_gen.py +++ /dev/null @@ -1,335 +0,0 @@ -# ----------------------------------------------------------------- -# _ast_gen.py -# -# Generates the AST Node classes from a specification given in -# a configuration file -# -# The design of this module was inspired by astgen.py from the -# Python 2.5 code-base. -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -# ----------------------------------------------------------------- -import pprint -from string import Template - - -class ASTCodeGenerator(object): - def __init__(self, cfg_filename="_c_ast.cfg"): - """Initialize the code generator from a configuration - file. - """ - self.cfg_filename = cfg_filename - self.node_cfg = [ - NodeCfg(name, contents) - for (name, contents) in self.parse_cfgfile(cfg_filename) - ] - - def generate(self, file=None): - """Generates the code into file, an open file buffer.""" - src = Template(_PROLOGUE_COMMENT).substitute(cfg_filename=self.cfg_filename) - - src += _PROLOGUE_CODE - for node_cfg in self.node_cfg: - src += node_cfg.generate_source() + "\n\n" - - file.write(src) - - def parse_cfgfile(self, filename): - """Parse the configuration file and yield pairs of - (name, contents) for each node. - """ - with open(filename, "r") as f: - for line in f: - line = line.strip() - if not line or line.startswith("#"): - continue - colon_i = line.find(":") - lbracket_i = line.find("[") - rbracket_i = line.find("]") - if colon_i < 1 or lbracket_i <= colon_i or rbracket_i <= lbracket_i: - raise RuntimeError("Invalid line in %s:\n%s\n" % (filename, line)) - - name = line[:colon_i] - val = line[lbracket_i + 1 : rbracket_i] - vallist = [v.strip() for v in val.split(",")] if val else [] - yield name, vallist - - -class NodeCfg(object): - """Node configuration. - - name: node name - contents: a list of contents - attributes and child nodes - See comment at the top of the configuration file for details. - """ - - def __init__(self, name, contents): - self.name = name - self.all_entries = [] - self.attr = [] - self.child = [] - self.seq_child = [] - - for entry in contents: - clean_entry = entry.rstrip("*") - self.all_entries.append(clean_entry) - - if entry.endswith("**"): - self.seq_child.append(clean_entry) - elif entry.endswith("*"): - self.child.append(clean_entry) - else: - self.attr.append(entry) - - def generate_source(self): - src = self._gen_init() - src += "\n" + self._gen_children() - src += "\n" + self._gen_iter() - - src += "\n" + self._gen_attr_names() - return src - - def _gen_init(self): - src = "class %s(Node):\n" % self.name - - if self.all_entries: - args = ", ".join(self.all_entries) - slots = ", ".join("'{0}'".format(e) for e in self.all_entries) - slots += ", 'coord', '__weakref__'" - arglist = "(self, %s, coord=None)" % args - else: - slots = "'coord', '__weakref__'" - arglist = "(self, coord=None)" - - src += " __slots__ = (%s)\n" % slots - src += " def __init__%s:\n" % arglist - - for name in self.all_entries + ["coord"]: - src += " self.%s = %s\n" % (name, name) - - return src - - def _gen_children(self): - src = " def children(self):\n" - - if self.all_entries: - src += " nodelist = []\n" - - for child in self.child: - src += ( - " if self.%(child)s is not None:" - + ' nodelist.append(("%(child)s", self.%(child)s))\n' - ) % (dict(child=child)) - - for seq_child in self.seq_child: - src += ( - " for i, child in enumerate(self.%(child)s or []):\n" - ' nodelist.append(("%(child)s[%%d]" %% i, child))\n' - ) % (dict(child=seq_child)) - - src += " return tuple(nodelist)\n" - else: - src += " return ()\n" - - return src - - def _gen_iter(self): - src = " def __iter__(self):\n" - - if self.all_entries: - for child in self.child: - src += ( - " if self.%(child)s is not None:\n" - + " yield self.%(child)s\n" - ) % (dict(child=child)) - - for seq_child in self.seq_child: - src += ( - " for child in (self.%(child)s or []):\n" - " yield child\n" - ) % (dict(child=seq_child)) - - if not (self.child or self.seq_child): - # Empty generator - src += " return\n" + " yield\n" - else: - # Empty generator - src += " return\n" + " yield\n" - - return src - - def _gen_attr_names(self): - src = " attr_names = (" + "".join("%r, " % nm for nm in self.attr) + ")" - return src - - -_PROLOGUE_COMMENT = r"""#----------------------------------------------------------------- -# ** ATTENTION ** -# This code was automatically generated from the file: -# $cfg_filename -# -# Do not modify it directly. Modify the configuration file and -# run the generator again. -# ** ** *** ** ** -# -# pycparser: c_ast.py -# -# AST Node classes. -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -#----------------------------------------------------------------- - -""" - -_PROLOGUE_CODE = r''' -import sys - -def _repr(obj): - """ - Get the representation of an object, with dedicated pprint-like format for lists. - """ - if isinstance(obj, list): - return '[' + (',\n '.join((_repr(e).replace('\n', '\n ') for e in obj))) + '\n]' - else: - return repr(obj) - -class Node(object): - __slots__ = () - """ Abstract base class for AST nodes. - """ - def __repr__(self): - """ Generates a python representation of the current node - """ - result = self.__class__.__name__ + '(' - - indent = '' - separator = '' - for name in self.__slots__[:-2]: - result += separator - result += indent - result += name + '=' + (_repr(getattr(self, name)).replace('\n', '\n ' + (' ' * (len(name) + len(self.__class__.__name__))))) - - separator = ',' - indent = '\n ' + (' ' * len(self.__class__.__name__)) - - result += indent + ')' - - return result - - def children(self): - """ A sequence of all children that are Nodes - """ - pass - - def show(self, buf=sys.stdout, offset=0, attrnames=False, nodenames=False, showcoord=False, _my_node_name=None): - """ Pretty print the Node and all its attributes and - children (recursively) to a buffer. - - buf: - Open IO buffer into which the Node is printed. - - offset: - Initial offset (amount of leading spaces) - - attrnames: - True if you want to see the attribute names in - name=value pairs. False to only see the values. - - nodenames: - True if you want to see the actual node names - within their parents. - - showcoord: - Do you want the coordinates of each Node to be - displayed. - """ - lead = ' ' * offset - if nodenames and _my_node_name is not None: - buf.write(lead + self.__class__.__name__+ ' <' + _my_node_name + '>: ') - else: - buf.write(lead + self.__class__.__name__+ ': ') - - if self.attr_names: - if attrnames: - nvlist = [(n, getattr(self,n)) for n in self.attr_names] - attrstr = ', '.join('%s=%s' % nv for nv in nvlist) - else: - vlist = [getattr(self, n) for n in self.attr_names] - attrstr = ', '.join('%s' % v for v in vlist) - buf.write(attrstr) - - if showcoord: - buf.write(' (at %s)' % self.coord) - buf.write('\n') - - for (child_name, child) in self.children(): - child.show( - buf, - offset=offset + 2, - attrnames=attrnames, - nodenames=nodenames, - showcoord=showcoord, - _my_node_name=child_name) - - -class NodeVisitor(object): - """ A base NodeVisitor class for visiting c_ast nodes. - Subclass it and define your own visit_XXX methods, where - XXX is the class name you want to visit with these - methods. - - For example: - - class ConstantVisitor(NodeVisitor): - def __init__(self): - self.values = [] - - def visit_Constant(self, node): - self.values.append(node.value) - - Creates a list of values of all the constant nodes - encountered below the given node. To use it: - - cv = ConstantVisitor() - cv.visit(node) - - Notes: - - * generic_visit() will be called for AST nodes for which - no visit_XXX method was defined. - * The children of nodes for which a visit_XXX was - defined will not be visited - if you need this, call - generic_visit() on the node. - You can use: - NodeVisitor.generic_visit(self, node) - * Modeled after Python's own AST visiting facilities - (the ast module of Python 3.0) - """ - - _method_cache = None - - def visit(self, node): - """ Visit a node. - """ - - if self._method_cache is None: - self._method_cache = {} - - visitor = self._method_cache.get(node.__class__.__name__, None) - if visitor is None: - method = 'visit_' + node.__class__.__name__ - visitor = getattr(self, method, self.generic_visit) - self._method_cache[node.__class__.__name__] = visitor - - return visitor(node) - - def generic_visit(self, node): - """ Called if no explicit visitor function exists for a - node. Implements preorder visiting of the node. - """ - for c in node: - self.visit(c) - -''' diff --git a/src/pycparser/_build_tables.py b/src/pycparser/_build_tables.py deleted file mode 100755 index 84a7259f..00000000 --- a/src/pycparser/_build_tables.py +++ /dev/null @@ -1,33 +0,0 @@ -# ----------------------------------------------------------------- -# pycparser: _build_tables.py -# -# A dummy for generating the lexing/parsing tables and and -# compiling them into .pyc for faster execution in optimized mode. -# Also generates AST code from the configuration file. -# Should be called from the pycparser directory. -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -# ----------------------------------------------------------------- - -# Generate c_ast.py -from _ast_gen import ASTCodeGenerator - -ast_gen = ASTCodeGenerator("_c_ast.cfg") -ast_gen.generate(open("c_ast.py", "w")) - -import sys - -sys.path[0:0] = [".", ".."] -from pycparser import c_parser - -# Generates the tables -# -c_parser.CParser(lex_optimize=True, yacc_debug=False, yacc_optimize=True) - -import c_ast - -# Load to compile into .pyc -# -import lextab -import yacctab diff --git a/src/pycparser/_c_ast.cfg b/src/pycparser/_c_ast.cfg deleted file mode 100755 index b93d50bb..00000000 --- a/src/pycparser/_c_ast.cfg +++ /dev/null @@ -1,191 +0,0 @@ -#----------------------------------------------------------------- -# pycparser: _c_ast.cfg -# -# Defines the AST Node classes used in pycparser. -# -# Each entry is a Node sub-class name, listing the attributes -# and child nodes of the class: -# * - a child node -# ** - a sequence of child nodes -# - an attribute -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -#----------------------------------------------------------------- - -# ArrayDecl is a nested declaration of an array with the given type. -# dim: the dimension (for example, constant 42) -# dim_quals: list of dimension qualifiers, to support C99's allowing 'const' -# and 'static' within the array dimension in function declarations. -ArrayDecl: [type*, dim*, dim_quals] - -ArrayRef: [name*, subscript*] - -# op: =, +=, /= etc. -# -Assignment: [op, lvalue*, rvalue*] - -BinaryOp: [op, left*, right*] - -Break: [] - -Case: [expr*, stmts**] - -Cast: [to_type*, expr*] - -# Compound statement in C99 is a list of block items (declarations or -# statements). -# -Compound: [block_items**] - -# Compound literal (anonymous aggregate) for C99. -# (type-name) {initializer_list} -# type: the typename -# init: InitList for the initializer list -# -CompoundLiteral: [type*, init*] - -# type: int, char, float, etc. see CLexer for constant token types -# -Constant: [type, value] - -Continue: [] - -# name: the variable being declared -# quals: list of qualifiers (const, volatile) -# funcspec: list function specifiers (i.e. inline in C99) -# storage: list of storage specifiers (extern, register, etc.) -# type: declaration type (probably nested with all the modifiers) -# init: initialization value, or None -# bitsize: bit field size, or None -# -Decl: [name, quals, storage, funcspec, type*, init*, bitsize*] - -DeclList: [decls**] - -Default: [stmts**] - -DoWhile: [cond*, stmt*] - -# Represents the ellipsis (...) parameter in a function -# declaration -# -EllipsisParam: [] - -# An empty statement (a semicolon ';' on its own) -# -EmptyStatement: [] - -# Enumeration type specifier -# name: an optional ID -# values: an EnumeratorList -# -Enum: [name, values*] - -# A name/value pair for enumeration values -# -Enumerator: [name, value*] - -# A list of enumerators -# -EnumeratorList: [enumerators**] - -# A list of expressions separated by the comma operator. -# -ExprList: [exprs**] - -# This is the top of the AST, representing a single C file (a -# translation unit in K&R jargon). It contains a list of -# "external-declaration"s, which is either declarations (Decl), -# Typedef or function definitions (FuncDef). -# -FileAST: [ext**] - -# for (init; cond; next) stmt -# -For: [init*, cond*, next*, stmt*] - -# name: Id -# args: ExprList -# -FuncCall: [name*, args*] - -# type (args) -# -FuncDecl: [args*, type*] - -# Function definition: a declarator for the function name and -# a body, which is a compound statement. -# There's an optional list of parameter declarations for old -# K&R-style definitions -# -FuncDef: [decl*, param_decls**, body*] - -Goto: [name] - -ID: [name] - -# Holder for types that are a simple identifier (e.g. the built -# ins void, char etc. and typedef-defined types) -# -IdentifierType: [names] - -If: [cond*, iftrue*, iffalse*] - -# An initialization list used for compound literals. -# -InitList: [exprs**] - -Label: [name, stmt*] - -# A named initializer for C99. -# The name of a NamedInitializer is a sequence of Nodes, because -# names can be hierarchical and contain constant expressions. -# -NamedInitializer: [name**, expr*] - -# a list of comma separated function parameter declarations -# -ParamList: [params**] - -PtrDecl: [quals, type*] - -Return: [expr*] - -# name: struct tag name -# decls: declaration of members -# -Struct: [name, decls**] - -# type: . or -> -# name.field or name->field -# -StructRef: [name*, type, field*] - -Switch: [cond*, stmt*] - -# cond ? iftrue : iffalse -# -TernaryOp: [cond*, iftrue*, iffalse*] - -# A base type declaration -# -TypeDecl: [declname, quals, type*] - -# A typedef declaration. -# Very similar to Decl, but without some attributes -# -Typedef: [name, quals, storage, type*] - -Typename: [name, quals, type*] - -UnaryOp: [op, expr*] - -# name: union tag name -# decls: declaration of members -# -Union: [name, decls**] - -While: [cond*, stmt*] - -Pragma: [string] diff --git a/src/pycparser/ast_transforms.py b/src/pycparser/ast_transforms.py deleted file mode 100755 index 066edf3b..00000000 --- a/src/pycparser/ast_transforms.py +++ /dev/null @@ -1,104 +0,0 @@ -# ------------------------------------------------------------------------------ -# pycparser: ast_transforms.py -# -# Some utilities used by the parser to create a friendlier AST. -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -# ------------------------------------------------------------------------------ - -from . import c_ast - - -def fix_switch_cases(switch_node): - """The 'case' statements in a 'switch' come out of parsing with one - child node, so subsequent statements are just tucked to the parent - Compound. Additionally, consecutive (fall-through) case statements - come out messy. This is a peculiarity of the C grammar. The following: - - switch (myvar) { - case 10: - k = 10; - p = k + 1; - return 10; - case 20: - case 30: - return 20; - default: - break; - } - - Creates this tree (pseudo-dump): - - Switch - ID: myvar - Compound: - Case 10: - k = 10 - p = k + 1 - return 10 - Case 20: - Case 30: - return 20 - Default: - break - - The goal of this transform is to fix this mess, turning it into the - following: - - Switch - ID: myvar - Compound: - Case 10: - k = 10 - p = k + 1 - return 10 - Case 20: - Case 30: - return 20 - Default: - break - - A fixed AST node is returned. The argument may be modified. - """ - assert isinstance(switch_node, c_ast.Switch) - if not isinstance(switch_node.stmt, c_ast.Compound): - return switch_node - - # The new Compound child for the Switch, which will collect children in the - # correct order - new_compound = c_ast.Compound([], switch_node.stmt.coord) - - # The last Case/Default node - last_case = None - - # Goes over the children of the Compound below the Switch, adding them - # either directly below new_compound or below the last Case as appropriate - for child in switch_node.stmt.block_items: - if isinstance(child, (c_ast.Case, c_ast.Default)): - # If it's a Case/Default: - # 1. Add it to the Compound and mark as "last case" - # 2. If its immediate child is also a Case or Default, promote it - # to a sibling. - new_compound.block_items.append(child) - _extract_nested_case(child, new_compound.block_items) - last_case = new_compound.block_items[-1] - else: - # Other statements are added as children to the last case, if it - # exists. - if last_case is None: - new_compound.block_items.append(child) - else: - last_case.stmts.append(child) - - switch_node.stmt = new_compound - return switch_node - - -def _extract_nested_case(case_node, stmts_list): - """Recursively extract consecutive Case statements that are made nested - by the parser and add them to the stmts_list. - """ - if isinstance(case_node.stmts[0], (c_ast.Case, c_ast.Default)): - stmts_list.append(case_node.stmts.pop()) - _extract_nested_case(stmts_list[-1], stmts_list) diff --git a/src/pycparser/ast_transforms.pyc b/src/pycparser/ast_transforms.pyc deleted file mode 100644 index ad24a56f..00000000 Binary files a/src/pycparser/ast_transforms.pyc and /dev/null differ diff --git a/src/pycparser/c_ast.py b/src/pycparser/c_ast.py deleted file mode 100755 index 97ce9825..00000000 --- a/src/pycparser/c_ast.py +++ /dev/null @@ -1,1273 +0,0 @@ -# ----------------------------------------------------------------- -# ** ATTENTION ** -# This code was automatically generated from the file: -# _c_ast.cfg -# -# Do not modify it directly. Modify the configuration file and -# run the generator again. -# ** ** *** ** ** -# -# pycparser: c_ast.py -# -# AST Node classes. -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -# ----------------------------------------------------------------- - - -import sys - - -def _repr(obj): - """ - Get the representation of an object, with dedicated pprint-like format for lists. - """ - if isinstance(obj, list): - return "[" + (",\n ".join((_repr(e).replace("\n", "\n ") for e in obj))) + "\n]" - else: - return repr(obj) - - -class Node(object): - __slots__ = () - """ Abstract base class for AST nodes. - """ - - def __repr__(self): - """Generates a python representation of the current node""" - result = self.__class__.__name__ + "(" - - indent = "" - separator = "" - for name in self.__slots__[:-2]: - result += separator - result += indent - result += ( - name - + "=" - + ( - _repr(getattr(self, name)).replace( - "\n", - "\n " + (" " * (len(name) + len(self.__class__.__name__))), - ) - ) - ) - - separator = "," - indent = "\n " + (" " * len(self.__class__.__name__)) - - result += indent + ")" - - return result - - def children(self): - """A sequence of all children that are Nodes""" - pass - - def show( - self, - buf=sys.stdout, - offset=0, - attrnames=False, - nodenames=False, - showcoord=False, - _my_node_name=None, - ): - """Pretty print the Node and all its attributes and - children (recursively) to a buffer. - - buf: - Open IO buffer into which the Node is printed. - - offset: - Initial offset (amount of leading spaces) - - attrnames: - True if you want to see the attribute names in - name=value pairs. False to only see the values. - - nodenames: - True if you want to see the actual node names - within their parents. - - showcoord: - Do you want the coordinates of each Node to be - displayed. - """ - lead = " " * offset - if nodenames and _my_node_name is not None: - buf.write(lead + self.__class__.__name__ + " <" + _my_node_name + ">: ") - else: - buf.write(lead + self.__class__.__name__ + ": ") - - if self.attr_names: - if attrnames: - nvlist = [(n, getattr(self, n)) for n in self.attr_names] - attrstr = ", ".join("%s=%s" % nv for nv in nvlist) - else: - vlist = [getattr(self, n) for n in self.attr_names] - attrstr = ", ".join("%s" % v for v in vlist) - buf.write(attrstr) - - if showcoord: - buf.write(" (at %s)" % self.coord) - buf.write("\n") - - for (child_name, child) in self.children(): - child.show( - buf, - offset=offset + 2, - attrnames=attrnames, - nodenames=nodenames, - showcoord=showcoord, - _my_node_name=child_name, - ) - - -class NodeVisitor(object): - """A base NodeVisitor class for visiting c_ast nodes. - Subclass it and define your own visit_XXX methods, where - XXX is the class name you want to visit with these - methods. - - For example: - - class ConstantVisitor(NodeVisitor): - def __init__(self): - self.values = [] - - def visit_Constant(self, node): - self.values.append(node.value) - - Creates a list of values of all the constant nodes - encountered below the given node. To use it: - - cv = ConstantVisitor() - cv.visit(node) - - Notes: - - * generic_visit() will be called for AST nodes for which - no visit_XXX method was defined. - * The children of nodes for which a visit_XXX was - defined will not be visited - if you need this, call - generic_visit() on the node. - You can use: - NodeVisitor.generic_visit(self, node) - * Modeled after Python's own AST visiting facilities - (the ast module of Python 3.0) - """ - - _method_cache = None - - def visit(self, node): - """Visit a node.""" - - if self._method_cache is None: - self._method_cache = {} - - visitor = self._method_cache.get(node.__class__.__name__, None) - if visitor is None: - method = "visit_" + node.__class__.__name__ - visitor = getattr(self, method, self.generic_visit) - self._method_cache[node.__class__.__name__] = visitor - - return visitor(node) - - def generic_visit(self, node): - """Called if no explicit visitor function exists for a - node. Implements preorder visiting of the node. - """ - for c in node: - self.visit(c) - - -class ArrayDecl(Node): - __slots__ = ("type", "dim", "dim_quals", "coord", "__weakref__") - - def __init__(self, type, dim, dim_quals, coord=None): - self.type = type - self.dim = dim - self.dim_quals = dim_quals - self.coord = coord - - def children(self): - nodelist = [] - if self.type is not None: - nodelist.append(("type", self.type)) - if self.dim is not None: - nodelist.append(("dim", self.dim)) - return tuple(nodelist) - - def __iter__(self): - if self.type is not None: - yield self.type - if self.dim is not None: - yield self.dim - - attr_names = ("dim_quals",) - - -class ArrayRef(Node): - __slots__ = ("name", "subscript", "coord", "__weakref__") - - def __init__(self, name, subscript, coord=None): - self.name = name - self.subscript = subscript - self.coord = coord - - def children(self): - nodelist = [] - if self.name is not None: - nodelist.append(("name", self.name)) - if self.subscript is not None: - nodelist.append(("subscript", self.subscript)) - return tuple(nodelist) - - def __iter__(self): - if self.name is not None: - yield self.name - if self.subscript is not None: - yield self.subscript - - attr_names = () - - -class Assignment(Node): - __slots__ = ("op", "lvalue", "rvalue", "coord", "__weakref__") - - def __init__(self, op, lvalue, rvalue, coord=None): - self.op = op - self.lvalue = lvalue - self.rvalue = rvalue - self.coord = coord - - def children(self): - nodelist = [] - if self.lvalue is not None: - nodelist.append(("lvalue", self.lvalue)) - if self.rvalue is not None: - nodelist.append(("rvalue", self.rvalue)) - return tuple(nodelist) - - def __iter__(self): - if self.lvalue is not None: - yield self.lvalue - if self.rvalue is not None: - yield self.rvalue - - attr_names = ("op",) - - -class BinaryOp(Node): - __slots__ = ("op", "left", "right", "coord", "__weakref__") - - def __init__(self, op, left, right, coord=None): - self.op = op - self.left = left - self.right = right - self.coord = coord - - def children(self): - nodelist = [] - if self.left is not None: - nodelist.append(("left", self.left)) - if self.right is not None: - nodelist.append(("right", self.right)) - return tuple(nodelist) - - def __iter__(self): - if self.left is not None: - yield self.left - if self.right is not None: - yield self.right - - attr_names = ("op",) - - -class Break(Node): - __slots__ = ("coord", "__weakref__") - - def __init__(self, coord=None): - self.coord = coord - - def children(self): - return () - - def __iter__(self): - return - yield - - attr_names = () - - -class Case(Node): - __slots__ = ("expr", "stmts", "coord", "__weakref__") - - def __init__(self, expr, stmts, coord=None): - self.expr = expr - self.stmts = stmts - self.coord = coord - - def children(self): - nodelist = [] - if self.expr is not None: - nodelist.append(("expr", self.expr)) - for i, child in enumerate(self.stmts or []): - nodelist.append(("stmts[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - if self.expr is not None: - yield self.expr - for child in self.stmts or []: - yield child - - attr_names = () - - -class Cast(Node): - __slots__ = ("to_type", "expr", "coord", "__weakref__") - - def __init__(self, to_type, expr, coord=None): - self.to_type = to_type - self.expr = expr - self.coord = coord - - def children(self): - nodelist = [] - if self.to_type is not None: - nodelist.append(("to_type", self.to_type)) - if self.expr is not None: - nodelist.append(("expr", self.expr)) - return tuple(nodelist) - - def __iter__(self): - if self.to_type is not None: - yield self.to_type - if self.expr is not None: - yield self.expr - - attr_names = () - - -class Compound(Node): - __slots__ = ("block_items", "coord", "__weakref__") - - def __init__(self, block_items, coord=None): - self.block_items = block_items - self.coord = coord - - def children(self): - nodelist = [] - for i, child in enumerate(self.block_items or []): - nodelist.append(("block_items[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - for child in self.block_items or []: - yield child - - attr_names = () - - -class CompoundLiteral(Node): - __slots__ = ("type", "init", "coord", "__weakref__") - - def __init__(self, type, init, coord=None): - self.type = type - self.init = init - self.coord = coord - - def children(self): - nodelist = [] - if self.type is not None: - nodelist.append(("type", self.type)) - if self.init is not None: - nodelist.append(("init", self.init)) - return tuple(nodelist) - - def __iter__(self): - if self.type is not None: - yield self.type - if self.init is not None: - yield self.init - - attr_names = () - - -class Constant(Node): - __slots__ = ("type", "value", "coord", "__weakref__") - - def __init__(self, type, value, coord=None): - self.type = type - self.value = value - self.coord = coord - - def children(self): - nodelist = [] - return tuple(nodelist) - - def __iter__(self): - return - yield - - attr_names = ( - "type", - "value", - ) - - -class Continue(Node): - __slots__ = ("coord", "__weakref__") - - def __init__(self, coord=None): - self.coord = coord - - def children(self): - return () - - def __iter__(self): - return - yield - - attr_names = () - - -class Decl(Node): - __slots__ = ( - "name", - "quals", - "storage", - "funcspec", - "type", - "init", - "bitsize", - "coord", - "__weakref__", - ) - - def __init__(self, name, quals, storage, funcspec, type, init, bitsize, coord=None): - self.name = name - self.quals = quals - self.storage = storage - self.funcspec = funcspec - self.type = type - self.init = init - self.bitsize = bitsize - self.coord = coord - - def children(self): - nodelist = [] - if self.type is not None: - nodelist.append(("type", self.type)) - if self.init is not None: - nodelist.append(("init", self.init)) - if self.bitsize is not None: - nodelist.append(("bitsize", self.bitsize)) - return tuple(nodelist) - - def __iter__(self): - if self.type is not None: - yield self.type - if self.init is not None: - yield self.init - if self.bitsize is not None: - yield self.bitsize - - attr_names = ( - "name", - "quals", - "storage", - "funcspec", - ) - - -class DeclList(Node): - __slots__ = ("decls", "coord", "__weakref__") - - def __init__(self, decls, coord=None): - self.decls = decls - self.coord = coord - - def children(self): - nodelist = [] - for i, child in enumerate(self.decls or []): - nodelist.append(("decls[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - for child in self.decls or []: - yield child - - attr_names = () - - -class Default(Node): - __slots__ = ("stmts", "coord", "__weakref__") - - def __init__(self, stmts, coord=None): - self.stmts = stmts - self.coord = coord - - def children(self): - nodelist = [] - for i, child in enumerate(self.stmts or []): - nodelist.append(("stmts[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - for child in self.stmts or []: - yield child - - attr_names = () - - -class DoWhile(Node): - __slots__ = ("cond", "stmt", "coord", "__weakref__") - - def __init__(self, cond, stmt, coord=None): - self.cond = cond - self.stmt = stmt - self.coord = coord - - def children(self): - nodelist = [] - if self.cond is not None: - nodelist.append(("cond", self.cond)) - if self.stmt is not None: - nodelist.append(("stmt", self.stmt)) - return tuple(nodelist) - - def __iter__(self): - if self.cond is not None: - yield self.cond - if self.stmt is not None: - yield self.stmt - - attr_names = () - - -class EllipsisParam(Node): - __slots__ = ("coord", "__weakref__") - - def __init__(self, coord=None): - self.coord = coord - - def children(self): - return () - - def __iter__(self): - return - yield - - attr_names = () - - -class EmptyStatement(Node): - __slots__ = ("coord", "__weakref__") - - def __init__(self, coord=None): - self.coord = coord - - def children(self): - return () - - def __iter__(self): - return - yield - - attr_names = () - - -class Enum(Node): - __slots__ = ("name", "values", "coord", "__weakref__") - - def __init__(self, name, values, coord=None): - self.name = name - self.values = values - self.coord = coord - - def children(self): - nodelist = [] - if self.values is not None: - nodelist.append(("values", self.values)) - return tuple(nodelist) - - def __iter__(self): - if self.values is not None: - yield self.values - - attr_names = ("name",) - - -class Enumerator(Node): - __slots__ = ("name", "value", "coord", "__weakref__") - - def __init__(self, name, value, coord=None): - self.name = name - self.value = value - self.coord = coord - - def children(self): - nodelist = [] - if self.value is not None: - nodelist.append(("value", self.value)) - return tuple(nodelist) - - def __iter__(self): - if self.value is not None: - yield self.value - - attr_names = ("name",) - - -class EnumeratorList(Node): - __slots__ = ("enumerators", "coord", "__weakref__") - - def __init__(self, enumerators, coord=None): - self.enumerators = enumerators - self.coord = coord - - def children(self): - nodelist = [] - for i, child in enumerate(self.enumerators or []): - nodelist.append(("enumerators[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - for child in self.enumerators or []: - yield child - - attr_names = () - - -class ExprList(Node): - __slots__ = ("exprs", "coord", "__weakref__") - - def __init__(self, exprs, coord=None): - self.exprs = exprs - self.coord = coord - - def children(self): - nodelist = [] - for i, child in enumerate(self.exprs or []): - nodelist.append(("exprs[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - for child in self.exprs or []: - yield child - - attr_names = () - - -class FileAST(Node): - __slots__ = ("ext", "coord", "__weakref__") - - def __init__(self, ext, coord=None): - self.ext = ext - self.coord = coord - - def children(self): - nodelist = [] - for i, child in enumerate(self.ext or []): - nodelist.append(("ext[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - for child in self.ext or []: - yield child - - attr_names = () - - -class For(Node): - __slots__ = ("init", "cond", "next", "stmt", "coord", "__weakref__") - - def __init__(self, init, cond, next, stmt, coord=None): - self.init = init - self.cond = cond - self.next = next - self.stmt = stmt - self.coord = coord - - def children(self): - nodelist = [] - if self.init is not None: - nodelist.append(("init", self.init)) - if self.cond is not None: - nodelist.append(("cond", self.cond)) - if self.next is not None: - nodelist.append(("next", self.next)) - if self.stmt is not None: - nodelist.append(("stmt", self.stmt)) - return tuple(nodelist) - - def __iter__(self): - if self.init is not None: - yield self.init - if self.cond is not None: - yield self.cond - if self.next is not None: - yield self.next - if self.stmt is not None: - yield self.stmt - - attr_names = () - - -class FuncCall(Node): - __slots__ = ("name", "args", "coord", "__weakref__") - - def __init__(self, name, args, coord=None): - self.name = name - self.args = args - self.coord = coord - - def children(self): - nodelist = [] - if self.name is not None: - nodelist.append(("name", self.name)) - if self.args is not None: - nodelist.append(("args", self.args)) - return tuple(nodelist) - - def __iter__(self): - if self.name is not None: - yield self.name - if self.args is not None: - yield self.args - - attr_names = () - - -class FuncDecl(Node): - __slots__ = ("args", "type", "coord", "__weakref__") - - def __init__(self, args, type, coord=None): - self.args = args - self.type = type - self.coord = coord - - def children(self): - nodelist = [] - if self.args is not None: - nodelist.append(("args", self.args)) - if self.type is not None: - nodelist.append(("type", self.type)) - return tuple(nodelist) - - def __iter__(self): - if self.args is not None: - yield self.args - if self.type is not None: - yield self.type - - attr_names = () - - -class FuncDef(Node): - __slots__ = ("decl", "param_decls", "body", "coord", "__weakref__") - - def __init__(self, decl, param_decls, body, coord=None): - self.decl = decl - self.param_decls = param_decls - self.body = body - self.coord = coord - - def children(self): - nodelist = [] - if self.decl is not None: - nodelist.append(("decl", self.decl)) - if self.body is not None: - nodelist.append(("body", self.body)) - for i, child in enumerate(self.param_decls or []): - nodelist.append(("param_decls[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - if self.decl is not None: - yield self.decl - if self.body is not None: - yield self.body - for child in self.param_decls or []: - yield child - - attr_names = () - - -class Goto(Node): - __slots__ = ("name", "coord", "__weakref__") - - def __init__(self, name, coord=None): - self.name = name - self.coord = coord - - def children(self): - nodelist = [] - return tuple(nodelist) - - def __iter__(self): - return - yield - - attr_names = ("name",) - - -class ID(Node): - __slots__ = ("name", "coord", "__weakref__") - - def __init__(self, name, coord=None): - self.name = name - self.coord = coord - - def children(self): - nodelist = [] - return tuple(nodelist) - - def __iter__(self): - return - yield - - attr_names = ("name",) - - -class IdentifierType(Node): - __slots__ = ("names", "coord", "__weakref__") - - def __init__(self, names, coord=None): - self.names = names - self.coord = coord - - def children(self): - nodelist = [] - return tuple(nodelist) - - def __iter__(self): - return - yield - - attr_names = ("names",) - - -class If(Node): - __slots__ = ("cond", "iftrue", "iffalse", "coord", "__weakref__") - - def __init__(self, cond, iftrue, iffalse, coord=None): - self.cond = cond - self.iftrue = iftrue - self.iffalse = iffalse - self.coord = coord - - def children(self): - nodelist = [] - if self.cond is not None: - nodelist.append(("cond", self.cond)) - if self.iftrue is not None: - nodelist.append(("iftrue", self.iftrue)) - if self.iffalse is not None: - nodelist.append(("iffalse", self.iffalse)) - return tuple(nodelist) - - def __iter__(self): - if self.cond is not None: - yield self.cond - if self.iftrue is not None: - yield self.iftrue - if self.iffalse is not None: - yield self.iffalse - - attr_names = () - - -class InitList(Node): - __slots__ = ("exprs", "coord", "__weakref__") - - def __init__(self, exprs, coord=None): - self.exprs = exprs - self.coord = coord - - def children(self): - nodelist = [] - for i, child in enumerate(self.exprs or []): - nodelist.append(("exprs[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - for child in self.exprs or []: - yield child - - attr_names = () - - -class Label(Node): - __slots__ = ("name", "stmt", "coord", "__weakref__") - - def __init__(self, name, stmt, coord=None): - self.name = name - self.stmt = stmt - self.coord = coord - - def children(self): - nodelist = [] - if self.stmt is not None: - nodelist.append(("stmt", self.stmt)) - return tuple(nodelist) - - def __iter__(self): - if self.stmt is not None: - yield self.stmt - - attr_names = ("name",) - - -class NamedInitializer(Node): - __slots__ = ("name", "expr", "coord", "__weakref__") - - def __init__(self, name, expr, coord=None): - self.name = name - self.expr = expr - self.coord = coord - - def children(self): - nodelist = [] - if self.expr is not None: - nodelist.append(("expr", self.expr)) - for i, child in enumerate(self.name or []): - nodelist.append(("name[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - if self.expr is not None: - yield self.expr - for child in self.name or []: - yield child - - attr_names = () - - -class ParamList(Node): - __slots__ = ("params", "coord", "__weakref__") - - def __init__(self, params, coord=None): - self.params = params - self.coord = coord - - def children(self): - nodelist = [] - for i, child in enumerate(self.params or []): - nodelist.append(("params[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - for child in self.params or []: - yield child - - attr_names = () - - -class PtrDecl(Node): - __slots__ = ("quals", "type", "coord", "__weakref__") - - def __init__(self, quals, type, coord=None): - self.quals = quals - self.type = type - self.coord = coord - - def children(self): - nodelist = [] - if self.type is not None: - nodelist.append(("type", self.type)) - return tuple(nodelist) - - def __iter__(self): - if self.type is not None: - yield self.type - - attr_names = ("quals",) - - -class Return(Node): - __slots__ = ("expr", "coord", "__weakref__") - - def __init__(self, expr, coord=None): - self.expr = expr - self.coord = coord - - def children(self): - nodelist = [] - if self.expr is not None: - nodelist.append(("expr", self.expr)) - return tuple(nodelist) - - def __iter__(self): - if self.expr is not None: - yield self.expr - - attr_names = () - - -class Struct(Node): - __slots__ = ("name", "decls", "coord", "__weakref__") - - def __init__(self, name, decls, coord=None): - self.name = name - self.decls = decls - self.coord = coord - - def children(self): - nodelist = [] - for i, child in enumerate(self.decls or []): - nodelist.append(("decls[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - for child in self.decls or []: - yield child - - attr_names = ("name",) - - -class StructRef(Node): - __slots__ = ("name", "type", "field", "coord", "__weakref__") - - def __init__(self, name, type, field, coord=None): - self.name = name - self.type = type - self.field = field - self.coord = coord - - def children(self): - nodelist = [] - if self.name is not None: - nodelist.append(("name", self.name)) - if self.field is not None: - nodelist.append(("field", self.field)) - return tuple(nodelist) - - def __iter__(self): - if self.name is not None: - yield self.name - if self.field is not None: - yield self.field - - attr_names = ("type",) - - -class Switch(Node): - __slots__ = ("cond", "stmt", "coord", "__weakref__") - - def __init__(self, cond, stmt, coord=None): - self.cond = cond - self.stmt = stmt - self.coord = coord - - def children(self): - nodelist = [] - if self.cond is not None: - nodelist.append(("cond", self.cond)) - if self.stmt is not None: - nodelist.append(("stmt", self.stmt)) - return tuple(nodelist) - - def __iter__(self): - if self.cond is not None: - yield self.cond - if self.stmt is not None: - yield self.stmt - - attr_names = () - - -class TernaryOp(Node): - __slots__ = ("cond", "iftrue", "iffalse", "coord", "__weakref__") - - def __init__(self, cond, iftrue, iffalse, coord=None): - self.cond = cond - self.iftrue = iftrue - self.iffalse = iffalse - self.coord = coord - - def children(self): - nodelist = [] - if self.cond is not None: - nodelist.append(("cond", self.cond)) - if self.iftrue is not None: - nodelist.append(("iftrue", self.iftrue)) - if self.iffalse is not None: - nodelist.append(("iffalse", self.iffalse)) - return tuple(nodelist) - - def __iter__(self): - if self.cond is not None: - yield self.cond - if self.iftrue is not None: - yield self.iftrue - if self.iffalse is not None: - yield self.iffalse - - attr_names = () - - -class TypeDecl(Node): - __slots__ = ("declname", "quals", "type", "coord", "__weakref__") - - def __init__(self, declname, quals, type, coord=None): - self.declname = declname - self.quals = quals - self.type = type - self.coord = coord - - def children(self): - nodelist = [] - if self.type is not None: - nodelist.append(("type", self.type)) - return tuple(nodelist) - - def __iter__(self): - if self.type is not None: - yield self.type - - attr_names = ( - "declname", - "quals", - ) - - -class Typedef(Node): - __slots__ = ("name", "quals", "storage", "type", "coord", "__weakref__") - - def __init__(self, name, quals, storage, type, coord=None): - self.name = name - self.quals = quals - self.storage = storage - self.type = type - self.coord = coord - - def children(self): - nodelist = [] - if self.type is not None: - nodelist.append(("type", self.type)) - return tuple(nodelist) - - def __iter__(self): - if self.type is not None: - yield self.type - - attr_names = ( - "name", - "quals", - "storage", - ) - - -class Typename(Node): - __slots__ = ("name", "quals", "type", "coord", "__weakref__") - - def __init__(self, name, quals, type, coord=None): - self.name = name - self.quals = quals - self.type = type - self.coord = coord - - def children(self): - nodelist = [] - if self.type is not None: - nodelist.append(("type", self.type)) - return tuple(nodelist) - - def __iter__(self): - if self.type is not None: - yield self.type - - attr_names = ( - "name", - "quals", - ) - - -class UnaryOp(Node): - __slots__ = ("op", "expr", "coord", "__weakref__") - - def __init__(self, op, expr, coord=None): - self.op = op - self.expr = expr - self.coord = coord - - def children(self): - nodelist = [] - if self.expr is not None: - nodelist.append(("expr", self.expr)) - return tuple(nodelist) - - def __iter__(self): - if self.expr is not None: - yield self.expr - - attr_names = ("op",) - - -class Union(Node): - __slots__ = ("name", "decls", "coord", "__weakref__") - - def __init__(self, name, decls, coord=None): - self.name = name - self.decls = decls - self.coord = coord - - def children(self): - nodelist = [] - for i, child in enumerate(self.decls or []): - nodelist.append(("decls[%d]" % i, child)) - return tuple(nodelist) - - def __iter__(self): - for child in self.decls or []: - yield child - - attr_names = ("name",) - - -class While(Node): - __slots__ = ("cond", "stmt", "coord", "__weakref__") - - def __init__(self, cond, stmt, coord=None): - self.cond = cond - self.stmt = stmt - self.coord = coord - - def children(self): - nodelist = [] - if self.cond is not None: - nodelist.append(("cond", self.cond)) - if self.stmt is not None: - nodelist.append(("stmt", self.stmt)) - return tuple(nodelist) - - def __iter__(self): - if self.cond is not None: - yield self.cond - if self.stmt is not None: - yield self.stmt - - attr_names = () - - -class Pragma(Node): - __slots__ = ("string", "coord", "__weakref__") - - def __init__(self, string, coord=None): - self.string = string - self.coord = coord - - def children(self): - nodelist = [] - return tuple(nodelist) - - def __iter__(self): - return - yield - - attr_names = ("string",) diff --git a/src/pycparser/c_ast.pyc b/src/pycparser/c_ast.pyc deleted file mode 100644 index 78d0d43a..00000000 Binary files a/src/pycparser/c_ast.pyc and /dev/null differ diff --git a/src/pycparser/c_generator.py b/src/pycparser/c_generator.py deleted file mode 100755 index 8342266b..00000000 --- a/src/pycparser/c_generator.py +++ /dev/null @@ -1,452 +0,0 @@ -# ------------------------------------------------------------------------------ -# pycparser: c_generator.py -# -# C code generator from pycparser AST nodes. -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -# ------------------------------------------------------------------------------ -from . import c_ast - - -class CGenerator(object): - """Uses the same visitor pattern as c_ast.NodeVisitor, but modified to - return a value from each visit method, using string accumulation in - generic_visit. - """ - - def __init__(self): - # Statements start with indentation of self.indent_level spaces, using - # the _make_indent method - # - self.indent_level = 0 - - def _make_indent(self): - return " " * self.indent_level - - def visit(self, node): - method = "visit_" + node.__class__.__name__ - return getattr(self, method, self.generic_visit)(node) - - def generic_visit(self, node): - # ~ print('generic:', type(node)) - if node is None: - return "" - else: - return "".join(self.visit(c) for c_name, c in node.children()) - - def visit_Constant(self, n): - return n.value - - def visit_ID(self, n): - return n.name - - def visit_Pragma(self, n): - ret = "#pragma" - if n.string: - ret += " " + n.string - return ret - - def visit_ArrayRef(self, n): - arrref = self._parenthesize_unless_simple(n.name) - return arrref + "[" + self.visit(n.subscript) + "]" - - def visit_StructRef(self, n): - sref = self._parenthesize_unless_simple(n.name) - return sref + n.type + self.visit(n.field) - - def visit_FuncCall(self, n): - fref = self._parenthesize_unless_simple(n.name) - return fref + "(" + self.visit(n.args) + ")" - - def visit_UnaryOp(self, n): - operand = self._parenthesize_unless_simple(n.expr) - if n.op == "p++": - return "%s++" % operand - elif n.op == "p--": - return "%s--" % operand - elif n.op == "sizeof": - # Always parenthesize the argument of sizeof since it can be - # a name. - return "sizeof(%s)" % self.visit(n.expr) - else: - return "%s%s" % (n.op, operand) - - def visit_BinaryOp(self, n): - lval_str = self._parenthesize_if(n.left, lambda d: not self._is_simple_node(d)) - rval_str = self._parenthesize_if(n.right, lambda d: not self._is_simple_node(d)) - return "%s %s %s" % (lval_str, n.op, rval_str) - - def visit_Assignment(self, n): - rval_str = self._parenthesize_if( - n.rvalue, lambda n: isinstance(n, c_ast.Assignment) - ) - return "%s %s %s" % (self.visit(n.lvalue), n.op, rval_str) - - def visit_IdentifierType(self, n): - return " ".join(n.names) - - def _visit_expr(self, n): - if isinstance(n, c_ast.InitList): - return "{" + self.visit(n) + "}" - elif isinstance(n, c_ast.ExprList): - return "(" + self.visit(n) + ")" - else: - return self.visit(n) - - def visit_Decl(self, n, no_type=False): - # no_type is used when a Decl is part of a DeclList, where the type is - # explicitly only for the first declaration in a list. - # - s = n.name if no_type else self._generate_decl(n) - if n.bitsize: - s += " : " + self.visit(n.bitsize) - if n.init: - s += " = " + self._visit_expr(n.init) - return s - - def visit_DeclList(self, n): - s = self.visit(n.decls[0]) - if len(n.decls) > 1: - s += ", " + ", ".join( - self.visit_Decl(decl, no_type=True) for decl in n.decls[1:] - ) - return s - - def visit_Typedef(self, n): - s = "" - if n.storage: - s += " ".join(n.storage) + " " - s += self._generate_type(n.type) - return s - - def visit_Cast(self, n): - s = "(" + self._generate_type(n.to_type) + ")" - return s + " " + self._parenthesize_unless_simple(n.expr) - - def visit_ExprList(self, n): - visited_subexprs = [] - for expr in n.exprs: - visited_subexprs.append(self._visit_expr(expr)) - return ", ".join(visited_subexprs) - - def visit_InitList(self, n): - visited_subexprs = [] - for expr in n.exprs: - visited_subexprs.append(self._visit_expr(expr)) - return ", ".join(visited_subexprs) - - def visit_Enum(self, n): - return self._generate_struct_union_enum(n, name="enum") - - def visit_Enumerator(self, n): - if not n.value: - return "{indent}{name},\n".format( - indent=self._make_indent(), - name=n.name, - ) - else: - return "{indent}{name} = {value},\n".format( - indent=self._make_indent(), - name=n.name, - value=self.visit(n.value), - ) - - def visit_FuncDef(self, n): - decl = self.visit(n.decl) - self.indent_level = 0 - body = self.visit(n.body) - if n.param_decls: - knrdecls = ";\n".join(self.visit(p) for p in n.param_decls) - return decl + "\n" + knrdecls + ";\n" + body + "\n" - else: - return decl + "\n" + body + "\n" - - def visit_FileAST(self, n): - s = "" - for ext in n.ext: - if isinstance(ext, c_ast.FuncDef): - s += self.visit(ext) - elif isinstance(ext, c_ast.Pragma): - s += self.visit(ext) + "\n" - else: - s += self.visit(ext) + ";\n" - return s - - def visit_Compound(self, n): - s = self._make_indent() + "{\n" - self.indent_level += 2 - if n.block_items: - s += "".join(self._generate_stmt(stmt) for stmt in n.block_items) - self.indent_level -= 2 - s += self._make_indent() + "}\n" - return s - - def visit_CompoundLiteral(self, n): - return "(" + self.visit(n.type) + "){" + self.visit(n.init) + "}" - - def visit_EmptyStatement(self, n): - return ";" - - def visit_ParamList(self, n): - return ", ".join(self.visit(param) for param in n.params) - - def visit_Return(self, n): - s = "return" - if n.expr: - s += " " + self.visit(n.expr) - return s + ";" - - def visit_Break(self, n): - return "break;" - - def visit_Continue(self, n): - return "continue;" - - def visit_TernaryOp(self, n): - s = "(" + self._visit_expr(n.cond) + ") ? " - s += "(" + self._visit_expr(n.iftrue) + ") : " - s += "(" + self._visit_expr(n.iffalse) + ")" - return s - - def visit_If(self, n): - s = "if (" - if n.cond: - s += self.visit(n.cond) - s += ")\n" - s += self._generate_stmt(n.iftrue, add_indent=True) - if n.iffalse: - s += self._make_indent() + "else\n" - s += self._generate_stmt(n.iffalse, add_indent=True) - return s - - def visit_For(self, n): - s = "for (" - if n.init: - s += self.visit(n.init) - s += ";" - if n.cond: - s += " " + self.visit(n.cond) - s += ";" - if n.next: - s += " " + self.visit(n.next) - s += ")\n" - s += self._generate_stmt(n.stmt, add_indent=True) - return s - - def visit_While(self, n): - s = "while (" - if n.cond: - s += self.visit(n.cond) - s += ")\n" - s += self._generate_stmt(n.stmt, add_indent=True) - return s - - def visit_DoWhile(self, n): - s = "do\n" - s += self._generate_stmt(n.stmt, add_indent=True) - s += self._make_indent() + "while (" - if n.cond: - s += self.visit(n.cond) - s += ");" - return s - - def visit_Switch(self, n): - s = "switch (" + self.visit(n.cond) + ")\n" - s += self._generate_stmt(n.stmt, add_indent=True) - return s - - def visit_Case(self, n): - s = "case " + self.visit(n.expr) + ":\n" - for stmt in n.stmts: - s += self._generate_stmt(stmt, add_indent=True) - return s - - def visit_Default(self, n): - s = "default:\n" - for stmt in n.stmts: - s += self._generate_stmt(stmt, add_indent=True) - return s - - def visit_Label(self, n): - return n.name + ":\n" + self._generate_stmt(n.stmt) - - def visit_Goto(self, n): - return "goto " + n.name + ";" - - def visit_EllipsisParam(self, n): - return "..." - - def visit_Struct(self, n): - return self._generate_struct_union_enum(n, "struct") - - def visit_Typename(self, n): - return self._generate_type(n.type) - - def visit_Union(self, n): - return self._generate_struct_union_enum(n, "union") - - def visit_NamedInitializer(self, n): - s = "" - for name in n.name: - if isinstance(name, c_ast.ID): - s += "." + name.name - else: - s += "[" + self.visit(name) + "]" - s += " = " + self._visit_expr(n.expr) - return s - - def visit_FuncDecl(self, n): - return self._generate_type(n) - - def _generate_struct_union_enum(self, n, name): - """Generates code for structs, unions, and enums. name should be - 'struct', 'union', or 'enum'. - """ - if name in ("struct", "union"): - members = n.decls - body_function = self._generate_struct_union_body - else: - assert name == "enum" - members = None if n.values is None else n.values.enumerators - body_function = self._generate_enum_body - s = name + " " + (n.name or "") - if members is not None: - # None means no members - # Empty sequence means an empty list of members - s += "\n" - s += self._make_indent() - self.indent_level += 2 - s += "{\n" - s += body_function(members) - self.indent_level -= 2 - s += self._make_indent() + "}" - return s - - def _generate_struct_union_body(self, members): - return "".join(self._generate_stmt(decl) for decl in members) - - def _generate_enum_body(self, members): - # `[:-2] + '\n'` removes the final `,` from the enumerator list - return "".join(self.visit(value) for value in members)[:-2] + "\n" - - def _generate_stmt(self, n, add_indent=False): - """Generation from a statement node. This method exists as a wrapper - for individual visit_* methods to handle different treatment of - some statements in this context. - """ - typ = type(n) - if add_indent: - self.indent_level += 2 - indent = self._make_indent() - if add_indent: - self.indent_level -= 2 - - if typ in ( - c_ast.Decl, - c_ast.Assignment, - c_ast.Cast, - c_ast.UnaryOp, - c_ast.BinaryOp, - c_ast.TernaryOp, - c_ast.FuncCall, - c_ast.ArrayRef, - c_ast.StructRef, - c_ast.Constant, - c_ast.ID, - c_ast.Typedef, - c_ast.ExprList, - ): - # These can also appear in an expression context so no semicolon - # is added to them automatically - # - return indent + self.visit(n) + ";\n" - elif typ in (c_ast.Compound,): - # No extra indentation required before the opening brace of a - # compound - because it consists of multiple lines it has to - # compute its own indentation. - # - return self.visit(n) - else: - return indent + self.visit(n) + "\n" - - def _generate_decl(self, n): - """Generation from a Decl node.""" - s = "" - if n.funcspec: - s = " ".join(n.funcspec) + " " - if n.storage: - s += " ".join(n.storage) + " " - s += self._generate_type(n.type) - return s - - def _generate_type(self, n, modifiers=[]): - """Recursive generation from a type node. n is the type node. - modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers - encountered on the way down to a TypeDecl, to allow proper - generation from it. - """ - typ = type(n) - # ~ print(n, modifiers) - - if typ == c_ast.TypeDecl: - s = "" - if n.quals: - s += " ".join(n.quals) + " " - s += self.visit(n.type) - - nstr = n.declname if n.declname else "" - # Resolve modifiers. - # Wrap in parens to distinguish pointer to array and pointer to - # function syntax. - # - for i, modifier in enumerate(modifiers): - if isinstance(modifier, c_ast.ArrayDecl): - if i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl): - nstr = "(" + nstr + ")" - nstr += "[" + self.visit(modifier.dim) + "]" - elif isinstance(modifier, c_ast.FuncDecl): - if i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl): - nstr = "(" + nstr + ")" - nstr += "(" + self.visit(modifier.args) + ")" - elif isinstance(modifier, c_ast.PtrDecl): - if modifier.quals: - nstr = "* %s %s" % (" ".join(modifier.quals), nstr) - else: - nstr = "*" + nstr - if nstr: - s += " " + nstr - return s - elif typ == c_ast.Decl: - return self._generate_decl(n.type) - elif typ == c_ast.Typename: - return self._generate_type(n.type) - elif typ == c_ast.IdentifierType: - return " ".join(n.names) + " " - elif typ in (c_ast.ArrayDecl, c_ast.PtrDecl, c_ast.FuncDecl): - return self._generate_type(n.type, modifiers + [n]) - else: - return self.visit(n) - - def _parenthesize_if(self, n, condition): - """Visits 'n' and returns its string representation, parenthesized - if the condition function applied to the node returns True. - """ - s = self._visit_expr(n) - if condition(n): - return "(" + s + ")" - else: - return s - - def _parenthesize_unless_simple(self, n): - """Common use case for _parenthesize_if""" - return self._parenthesize_if(n, lambda d: not self._is_simple_node(d)) - - def _is_simple_node(self, n): - """Returns True for nodes that are "simple" - i.e. nodes that always - have higher precedence than operators. - """ - return isinstance( - n, - (c_ast.Constant, c_ast.ID, c_ast.ArrayRef, c_ast.StructRef, c_ast.FuncCall), - ) diff --git a/src/pycparser/c_generator.pyc b/src/pycparser/c_generator.pyc deleted file mode 100644 index a8da8c5a..00000000 Binary files a/src/pycparser/c_generator.pyc and /dev/null differ diff --git a/src/pycparser/c_lexer.py b/src/pycparser/c_lexer.py deleted file mode 100755 index d6910787..00000000 --- a/src/pycparser/c_lexer.py +++ /dev/null @@ -1,567 +0,0 @@ -# ------------------------------------------------------------------------------ -# pycparser: c_lexer.py -# -# CLexer class: lexer for the C language -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -# ------------------------------------------------------------------------------ -import re -import sys - -from .ply import lex -from .ply.lex import TOKEN - - -class CLexer(object): - """A lexer for the C language. After building it, set the - input text with input(), and call token() to get new - tokens. - - The public attribute filename can be set to an initial - filaneme, but the lexer will update it upon #line - directives. - """ - - def __init__(self, error_func, on_lbrace_func, on_rbrace_func, type_lookup_func): - """Create a new Lexer. - - error_func: - An error function. Will be called with an error - message, line and column as arguments, in case of - an error during lexing. - - on_lbrace_func, on_rbrace_func: - Called when an LBRACE or RBRACE is encountered - (likely to push/pop type_lookup_func's scope) - - type_lookup_func: - A type lookup function. Given a string, it must - return True IFF this string is a name of a type - that was defined with a typedef earlier. - """ - self.error_func = error_func - self.on_lbrace_func = on_lbrace_func - self.on_rbrace_func = on_rbrace_func - self.type_lookup_func = type_lookup_func - self.filename = "" - - # Keeps track of the last token returned from self.token() - self.last_token = None - - # Allow either "# line" or "# " to support GCC's - # cpp output - # - self.line_pattern = re.compile(r"([ \t]*line\W)|([ \t]*\d+)") - self.pragma_pattern = re.compile(r"[ \t]*pragma\W") - - def build(self, **kwargs): - """Builds the lexer from the specification. Must be - called after the lexer object is created. - - This method exists separately, because the PLY - manual warns against calling lex.lex inside - __init__ - """ - self.lexer = lex.lex(object=self, **kwargs) - - def reset_lineno(self): - """Resets the internal line number counter of the lexer.""" - self.lexer.lineno = 1 - - def input(self, text): - self.lexer.input(text) - - def token(self): - self.last_token = self.lexer.token() - return self.last_token - - def find_tok_column(self, token): - """Find the column of the token in its line.""" - last_cr = self.lexer.lexdata.rfind("\n", 0, token.lexpos) - return token.lexpos - last_cr - - ######################-- PRIVATE --###################### - - ## - ## Internal auxiliary methods - ## - def _error(self, msg, token): - location = self._make_tok_location(token) - self.error_func(msg, location[0], location[1]) - self.lexer.skip(1) - - def _make_tok_location(self, token): - return (token.lineno, self.find_tok_column(token)) - - ## - ## Reserved keywords - ## - keywords = ( - "_BOOL", - "_COMPLEX", - "AUTO", - "BREAK", - "CASE", - "CHAR", - "CONST", - "CONTINUE", - "DEFAULT", - "DO", - "DOUBLE", - "ELSE", - "ENUM", - "EXTERN", - "FLOAT", - "FOR", - "GOTO", - "IF", - "INLINE", - "INT", - "LONG", - "REGISTER", - "OFFSETOF", - "RESTRICT", - "RETURN", - "SHORT", - "SIGNED", - "SIZEOF", - "STATIC", - "STRUCT", - "SWITCH", - "TYPEDEF", - "UNION", - "UNSIGNED", - "VOID", - "VOLATILE", - "WHILE", - "__INT128", - ) - - keyword_map = {} - for keyword in keywords: - if keyword == "_BOOL": - keyword_map["_Bool"] = keyword - elif keyword == "_COMPLEX": - keyword_map["_Complex"] = keyword - else: - keyword_map[keyword.lower()] = keyword - - ## - ## All the tokens recognized by the lexer - ## - tokens = keywords + ( - # Identifiers - "ID", - # Type identifiers (identifiers previously defined as - # types with typedef) - "TYPEID", - # constants - "INT_CONST_DEC", - "INT_CONST_OCT", - "INT_CONST_HEX", - "INT_CONST_BIN", - "FLOAT_CONST", - "HEX_FLOAT_CONST", - "CHAR_CONST", - "WCHAR_CONST", - # String literals - "STRING_LITERAL", - "WSTRING_LITERAL", - # Operators - "PLUS", - "MINUS", - "TIMES", - "DIVIDE", - "MOD", - "OR", - "AND", - "NOT", - "XOR", - "LSHIFT", - "RSHIFT", - "LOR", - "LAND", - "LNOT", - "LT", - "LE", - "GT", - "GE", - "EQ", - "NE", - # Assignment - "EQUALS", - "TIMESEQUAL", - "DIVEQUAL", - "MODEQUAL", - "PLUSEQUAL", - "MINUSEQUAL", - "LSHIFTEQUAL", - "RSHIFTEQUAL", - "ANDEQUAL", - "XOREQUAL", - "OREQUAL", - # Increment/decrement - "PLUSPLUS", - "MINUSMINUS", - # Structure dereference (->) - "ARROW", - # Conditional operator (?) - "CONDOP", - # Delimeters - "LPAREN", - "RPAREN", # ( ) - "LBRACKET", - "RBRACKET", # [ ] - "LBRACE", - "RBRACE", # { } - "COMMA", - "PERIOD", # . , - "SEMI", - "COLON", # ; : - # Ellipsis (...) - "ELLIPSIS", - # pre-processor - "PPHASH", # '#' - "PPPRAGMA", # 'pragma' - "PPPRAGMASTR", - ) - - ## - ## Regexes for use in tokens - ## - ## - - # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers) - identifier = r"[a-zA-Z_$][0-9a-zA-Z_$]*" - - hex_prefix = "0[xX]" - hex_digits = "[0-9a-fA-F]+" - bin_prefix = "0[bB]" - bin_digits = "[01]+" - - # integer constants (K&R2: A.2.5.1) - integer_suffix_opt = ( - r"(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?" - ) - decimal_constant = ( - "(0" + integer_suffix_opt + ")|([1-9][0-9]*" + integer_suffix_opt + ")" - ) - octal_constant = "0[0-7]*" + integer_suffix_opt - hex_constant = hex_prefix + hex_digits + integer_suffix_opt - bin_constant = bin_prefix + bin_digits + integer_suffix_opt - - bad_octal_constant = "0[0-7]*[89]" - - # character constants (K&R2: A.2.5.2) - # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line - # directives with Windows paths as filenames (..\..\dir\file) - # For the same reason, decimal_escape allows all digit sequences. We want to - # parse all correct code, even if it means to sometimes parse incorrect - # code. - # - simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" - decimal_escape = r"""(\d+)""" - hex_escape = r"""(x[0-9a-fA-F]+)""" - bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])""" - - escape_sequence = ( - r"""(\\(""" + simple_escape + "|" + decimal_escape + "|" + hex_escape + "))" - ) - cconst_char = r"""([^'\\\n]|""" + escape_sequence + ")" - char_const = "'" + cconst_char + "'" - wchar_const = "L" + char_const - unmatched_quote = "('" + cconst_char + "*\\n)|('" + cconst_char + "*$)" - bad_char_const = ( - r"""('""" - + cconst_char - + """[^'\n]+')|('')|('""" - + bad_escape - + r"""[^'\n]*')""" - ) - - # string literals (K&R2: A.2.6) - string_char = r"""([^"\\\n]|""" + escape_sequence + ")" - string_literal = '"' + string_char + '*"' - wstring_literal = "L" + string_literal - bad_string_literal = '"' + string_char + "*?" + bad_escape + string_char + '*"' - - # floating constants (K&R2: A.2.5.3) - exponent_part = r"""([eE][-+]?[0-9]+)""" - fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" - floating_constant = ( - "((((" - + fractional_constant - + ")" - + exponent_part - + "?)|([0-9]+" - + exponent_part - + "))[FfLl]?)" - ) - binary_exponent_part = r"""([pP][+-]?[0-9]+)""" - hex_fractional_constant = ( - "(((" + hex_digits + r""")?\.""" + hex_digits + ")|(" + hex_digits + r"""\.))""" - ) - hex_floating_constant = ( - "(" - + hex_prefix - + "(" - + hex_digits - + "|" - + hex_fractional_constant - + ")" - + binary_exponent_part - + "[FfLl]?)" - ) - - ## - ## Lexer states: used for preprocessor \n-terminated directives - ## - states = ( - # ppline: preprocessor line directives - # - ("ppline", "exclusive"), - # pppragma: pragma - # - ("pppragma", "exclusive"), - ) - - def t_PPHASH(self, t): - r"[ \t]*\#" - if self.line_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos): - t.lexer.begin("ppline") - self.pp_line = self.pp_filename = None - elif self.pragma_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos): - t.lexer.begin("pppragma") - else: - t.type = "PPHASH" - return t - - ## - ## Rules for the ppline state - ## - @TOKEN(string_literal) - def t_ppline_FILENAME(self, t): - if self.pp_line is None: - self._error("filename before line number in #line", t) - else: - self.pp_filename = t.value.lstrip('"').rstrip('"') - - @TOKEN(decimal_constant) - def t_ppline_LINE_NUMBER(self, t): - if self.pp_line is None: - self.pp_line = t.value - else: - # Ignore: GCC's cpp sometimes inserts a numeric flag - # after the file name - pass - - def t_ppline_NEWLINE(self, t): - r"\n" - if self.pp_line is None: - self._error("line number missing in #line", t) - else: - self.lexer.lineno = int(self.pp_line) - - if self.pp_filename is not None: - self.filename = self.pp_filename - - t.lexer.begin("INITIAL") - - def t_ppline_PPLINE(self, t): - r"line" - pass - - t_ppline_ignore = " \t" - - def t_ppline_error(self, t): - self._error("invalid #line directive", t) - - ## - ## Rules for the pppragma state - ## - def t_pppragma_NEWLINE(self, t): - r"\n" - t.lexer.lineno += 1 - t.lexer.begin("INITIAL") - - def t_pppragma_PPPRAGMA(self, t): - r"pragma" - return t - - t_pppragma_ignore = " \t" - - def t_pppragma_STR(self, t): - ".+" - t.type = "PPPRAGMASTR" - return t - - def t_pppragma_error(self, t): - self._error("invalid #pragma directive", t) - - ## - ## Rules for the normal state - ## - t_ignore = " \t" - - # Newlines - def t_NEWLINE(self, t): - r"\n+" - t.lexer.lineno += t.value.count("\n") - - # Operators - t_PLUS = r"\+" - t_MINUS = r"-" - t_TIMES = r"\*" - t_DIVIDE = r"/" - t_MOD = r"%" - t_OR = r"\|" - t_AND = r"&" - t_NOT = r"~" - t_XOR = r"\^" - t_LSHIFT = r"<<" - t_RSHIFT = r">>" - t_LOR = r"\|\|" - t_LAND = r"&&" - t_LNOT = r"!" - t_LT = r"<" - t_GT = r">" - t_LE = r"<=" - t_GE = r">=" - t_EQ = r"==" - t_NE = r"!=" - - # Assignment operators - t_EQUALS = r"=" - t_TIMESEQUAL = r"\*=" - t_DIVEQUAL = r"/=" - t_MODEQUAL = r"%=" - t_PLUSEQUAL = r"\+=" - t_MINUSEQUAL = r"-=" - t_LSHIFTEQUAL = r"<<=" - t_RSHIFTEQUAL = r">>=" - t_ANDEQUAL = r"&=" - t_OREQUAL = r"\|=" - t_XOREQUAL = r"\^=" - - # Increment/decrement - t_PLUSPLUS = r"\+\+" - t_MINUSMINUS = r"--" - - # -> - t_ARROW = r"->" - - # ? - t_CONDOP = r"\?" - - # Delimeters - t_LPAREN = r"\(" - t_RPAREN = r"\)" - t_LBRACKET = r"\[" - t_RBRACKET = r"\]" - t_COMMA = r"," - t_PERIOD = r"\." - t_SEMI = r";" - t_COLON = r":" - t_ELLIPSIS = r"\.\.\." - - # Scope delimiters - # To see why on_lbrace_func is needed, consider: - # typedef char TT; - # void foo(int TT) { TT = 10; } - # TT x = 5; - # Outside the function, TT is a typedef, but inside (starting and ending - # with the braces) it's a parameter. The trouble begins with yacc's - # lookahead token. If we open a new scope in brace_open, then TT has - # already been read and incorrectly interpreted as TYPEID. So, we need - # to open and close scopes from within the lexer. - # Similar for the TT immediately outside the end of the function. - # - @TOKEN(r"\{") - def t_LBRACE(self, t): - self.on_lbrace_func() - return t - - @TOKEN(r"\}") - def t_RBRACE(self, t): - self.on_rbrace_func() - return t - - t_STRING_LITERAL = string_literal - - # The following floating and integer constants are defined as - # functions to impose a strict order (otherwise, decimal - # is placed before the others because its regex is longer, - # and this is bad) - # - @TOKEN(floating_constant) - def t_FLOAT_CONST(self, t): - return t - - @TOKEN(hex_floating_constant) - def t_HEX_FLOAT_CONST(self, t): - return t - - @TOKEN(hex_constant) - def t_INT_CONST_HEX(self, t): - return t - - @TOKEN(bin_constant) - def t_INT_CONST_BIN(self, t): - return t - - @TOKEN(bad_octal_constant) - def t_BAD_CONST_OCT(self, t): - msg = "Invalid octal constant" - self._error(msg, t) - - @TOKEN(octal_constant) - def t_INT_CONST_OCT(self, t): - return t - - @TOKEN(decimal_constant) - def t_INT_CONST_DEC(self, t): - return t - - # Must come before bad_char_const, to prevent it from - # catching valid char constants as invalid - # - @TOKEN(char_const) - def t_CHAR_CONST(self, t): - return t - - @TOKEN(wchar_const) - def t_WCHAR_CONST(self, t): - return t - - @TOKEN(unmatched_quote) - def t_UNMATCHED_QUOTE(self, t): - msg = "Unmatched '" - self._error(msg, t) - - @TOKEN(bad_char_const) - def t_BAD_CHAR_CONST(self, t): - msg = "Invalid char constant %s" % t.value - self._error(msg, t) - - @TOKEN(wstring_literal) - def t_WSTRING_LITERAL(self, t): - return t - - # unmatched string literals are caught by the preprocessor - - @TOKEN(bad_string_literal) - def t_BAD_STRING_LITERAL(self, t): - msg = "String contains invalid escape code" - self._error(msg, t) - - @TOKEN(identifier) - def t_ID(self, t): - t.type = self.keyword_map.get(t.value, "ID") - if t.type == "ID" and self.type_lookup_func(t.value): - t.type = "TYPEID" - return t - - def t_error(self, t): - msg = "Illegal character %s" % repr(t.value[0]) - self._error(msg, t) diff --git a/src/pycparser/c_lexer.pyc b/src/pycparser/c_lexer.pyc deleted file mode 100644 index 4bc6c688..00000000 Binary files a/src/pycparser/c_lexer.pyc and /dev/null differ diff --git a/src/pycparser/c_parser.py b/src/pycparser/c_parser.py deleted file mode 100755 index e7bac0c8..00000000 --- a/src/pycparser/c_parser.py +++ /dev/null @@ -1,1752 +0,0 @@ -# ------------------------------------------------------------------------------ -# pycparser: c_parser.py -# -# CParser class: Parser and AST builder for the C language -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -# ------------------------------------------------------------------------------ -import re - -from . import c_ast -from .ast_transforms import fix_switch_cases -from .c_lexer import CLexer -from .ply import yacc -from .plyparser import Coord, ParseError, PLYParser, parameterized, template - - -@template -class CParser(PLYParser): - def __init__( - self, - lex_optimize=True, - lexer=CLexer, - lextab="pycparser.lextab", - yacc_optimize=True, - yacctab="pycparser.yacctab", - yacc_debug=False, - taboutputdir="", - ): - """Create a new CParser. - - Some arguments for controlling the debug/optimization - level of the parser are provided. The defaults are - tuned for release/performance mode. - The simple rules for using them are: - *) When tweaking CParser/CLexer, set these to False - *) When releasing a stable parser, set to True - - lex_optimize: - Set to False when you're modifying the lexer. - Otherwise, changes in the lexer won't be used, if - some lextab.py file exists. - When releasing with a stable lexer, set to True - to save the re-generation of the lexer table on - each run. - - lexer: - Set this parameter to define the lexer to use if - you're not using the default CLexer. - - lextab: - Points to the lex table that's used for optimized - mode. Only if you're modifying the lexer and want - some tests to avoid re-generating the table, make - this point to a local lex table file (that's been - earlier generated with lex_optimize=True) - - yacc_optimize: - Set to False when you're modifying the parser. - Otherwise, changes in the parser won't be used, if - some parsetab.py file exists. - When releasing with a stable parser, set to True - to save the re-generation of the parser table on - each run. - - yacctab: - Points to the yacc table that's used for optimized - mode. Only if you're modifying the parser, make - this point to a local yacc table file - - yacc_debug: - Generate a parser.out file that explains how yacc - built the parsing table from the grammar. - - taboutputdir: - Set this parameter to control the location of generated - lextab and yacctab files. - """ - self.clex = lexer( - error_func=self._lex_error_func, - on_lbrace_func=self._lex_on_lbrace_func, - on_rbrace_func=self._lex_on_rbrace_func, - type_lookup_func=self._lex_type_lookup_func, - ) - - self.clex.build(optimize=lex_optimize, lextab=lextab, outputdir=taboutputdir) - self.tokens = self.clex.tokens - - rules_with_opt = [ - "abstract_declarator", - "assignment_expression", - "declaration_list", - "declaration_specifiers_no_type", - "designation", - "expression", - "identifier_list", - "init_declarator_list", - "id_init_declarator_list", - "initializer_list", - "parameter_type_list", - "block_item_list", - "type_qualifier_list", - "struct_declarator_list", - ] - - for rule in rules_with_opt: - self._create_opt_rule(rule) - - self.cparser = yacc.yacc( - module=self, - start="translation_unit_or_empty", - debug=yacc_debug, - optimize=yacc_optimize, - tabmodule=yacctab, - outputdir=taboutputdir, - ) - - # Stack of scopes for keeping track of symbols. _scope_stack[-1] is - # the current (topmost) scope. Each scope is a dictionary that - # specifies whether a name is a type. If _scope_stack[n][name] is - # True, 'name' is currently a type in the scope. If it's False, - # 'name' is used in the scope but not as a type (for instance, if we - # saw: int name; - # If 'name' is not a key in _scope_stack[n] then 'name' was not defined - # in this scope at all. - self._scope_stack = [dict()] - - # Keeps track of the last token given to yacc (the lookahead token) - self._last_yielded_token = None - - def parse(self, text, filename="", debuglevel=0): - """Parses C code and returns an AST. - - text: - A string containing the C source code - - filename: - Name of the file being parsed (for meaningful - error messages) - - debuglevel: - Debug level to yacc - """ - self.clex.filename = filename - self.clex.reset_lineno() - self._scope_stack = [dict()] - self._last_yielded_token = None - return self.cparser.parse(input=text, lexer=self.clex, debug=debuglevel) - - ######################-- PRIVATE --###################### - - def _push_scope(self): - self._scope_stack.append(dict()) - - def _pop_scope(self): - assert len(self._scope_stack) > 1 - self._scope_stack.pop() - - def _add_typedef_name(self, name, coord): - """Add a new typedef name (ie a TYPEID) to the current scope""" - if not self._scope_stack[-1].get(name, True): - self._parse_error( - "Typedef %r previously declared as non-typedef " "in this scope" % name, - coord, - ) - self._scope_stack[-1][name] = True - - def _add_identifier(self, name, coord): - """Add a new object, function, or enum member name (ie an ID) to the - current scope - """ - if self._scope_stack[-1].get(name, False): - self._parse_error( - "Non-typedef %r previously declared as typedef " "in this scope" % name, - coord, - ) - self._scope_stack[-1][name] = False - - def _is_type_in_scope(self, name): - """Is *name* a typedef-name in the current scope?""" - for scope in reversed(self._scope_stack): - # If name is an identifier in this scope it shadows typedefs in - # higher scopes. - in_scope = scope.get(name) - if in_scope is not None: - return in_scope - return False - - def _lex_error_func(self, msg, line, column): - self._parse_error(msg, self._coord(line, column)) - - def _lex_on_lbrace_func(self): - self._push_scope() - - def _lex_on_rbrace_func(self): - self._pop_scope() - - def _lex_type_lookup_func(self, name): - """Looks up types that were previously defined with - typedef. - Passed to the lexer for recognizing identifiers that - are types. - """ - is_type = self._is_type_in_scope(name) - return is_type - - def _get_yacc_lookahead_token(self): - """We need access to yacc's lookahead token in certain cases. - This is the last token yacc requested from the lexer, so we - ask the lexer. - """ - return self.clex.last_token - - # To understand what's going on here, read sections A.8.5 and - # A.8.6 of K&R2 very carefully. - # - # A C type consists of a basic type declaration, with a list - # of modifiers. For example: - # - # int *c[5]; - # - # The basic declaration here is 'int c', and the pointer and - # the array are the modifiers. - # - # Basic declarations are represented by TypeDecl (from module c_ast) and the - # modifiers are FuncDecl, PtrDecl and ArrayDecl. - # - # The standard states that whenever a new modifier is parsed, it should be - # added to the end of the list of modifiers. For example: - # - # K&R2 A.8.6.2: Array Declarators - # - # In a declaration T D where D has the form - # D1 [constant-expression-opt] - # and the type of the identifier in the declaration T D1 is - # "type-modifier T", the type of the - # identifier of D is "type-modifier array of T" - # - # This is what this method does. The declarator it receives - # can be a list of declarators ending with TypeDecl. It - # tacks the modifier to the end of this list, just before - # the TypeDecl. - # - # Additionally, the modifier may be a list itself. This is - # useful for pointers, that can come as a chain from the rule - # p_pointer. In this case, the whole modifier list is spliced - # into the new location. - def _type_modify_decl(self, decl, modifier): - """Tacks a type modifier on a declarator, and returns - the modified declarator. - - Note: the declarator and modifier may be modified - """ - # ~ print '****' - # ~ decl.show(offset=3) - # ~ modifier.show(offset=3) - # ~ print '****' - - modifier_head = modifier - modifier_tail = modifier - - # The modifier may be a nested list. Reach its tail. - # - while modifier_tail.type: - modifier_tail = modifier_tail.type - - # If the decl is a basic type, just tack the modifier onto - # it - # - if isinstance(decl, c_ast.TypeDecl): - modifier_tail.type = decl - return modifier - else: - # Otherwise, the decl is a list of modifiers. Reach - # its tail and splice the modifier onto the tail, - # pointing to the underlying basic type. - # - decl_tail = decl - - while not isinstance(decl_tail.type, c_ast.TypeDecl): - decl_tail = decl_tail.type - - modifier_tail.type = decl_tail.type - decl_tail.type = modifier_head - return decl - - # Due to the order in which declarators are constructed, - # they have to be fixed in order to look like a normal AST. - # - # When a declaration arrives from syntax construction, it has - # these problems: - # * The innermost TypeDecl has no type (because the basic - # type is only known at the uppermost declaration level) - # * The declaration has no variable name, since that is saved - # in the innermost TypeDecl - # * The typename of the declaration is a list of type - # specifiers, and not a node. Here, basic identifier types - # should be separated from more complex types like enums - # and structs. - # - # This method fixes these problems. - # - def _fix_decl_name_type(self, decl, typename): - """Fixes a declaration. Modifies decl.""" - # Reach the underlying basic type - # - type = decl - while not isinstance(type, c_ast.TypeDecl): - type = type.type - - decl.name = type.declname - type.quals = decl.quals - - # The typename is a list of types. If any type in this - # list isn't an IdentifierType, it must be the only - # type in the list (it's illegal to declare "int enum ..") - # If all the types are basic, they're collected in the - # IdentifierType holder. - # - for tn in typename: - if not isinstance(tn, c_ast.IdentifierType): - if len(typename) > 1: - self._parse_error("Invalid multiple types specified", tn.coord) - else: - type.type = tn - return decl - - if not typename: - # Functions default to returning int - # - if not isinstance(decl.type, c_ast.FuncDecl): - self._parse_error("Missing type in declaration", decl.coord) - type.type = c_ast.IdentifierType(["int"], coord=decl.coord) - else: - # At this point, we know that typename is a list of IdentifierType - # nodes. Concatenate all the names into a single list. - # - type.type = c_ast.IdentifierType( - [name for id in typename for name in id.names], coord=typename[0].coord - ) - return decl - - def _add_declaration_specifier(self, declspec, newspec, kind, append=False): - """Declaration specifiers are represented by a dictionary - with the entries: - * qual: a list of type qualifiers - * storage: a list of storage type qualifiers - * type: a list of type specifiers - * function: a list of function specifiers - - This method is given a declaration specifier, and a - new specifier of a given kind. - If `append` is True, the new specifier is added to the end of - the specifiers list, otherwise it's added at the beginning. - Returns the declaration specifier, with the new - specifier incorporated. - """ - spec = declspec or dict(qual=[], storage=[], type=[], function=[]) - - if append: - spec[kind].append(newspec) - else: - spec[kind].insert(0, newspec) - - return spec - - def _build_declarations(self, spec, decls, typedef_namespace=False): - """Builds a list of declarations all sharing the given specifiers. - If typedef_namespace is true, each declared name is added - to the "typedef namespace", which also includes objects, - functions, and enum constants. - """ - is_typedef = "typedef" in spec["storage"] - declarations = [] - - # Bit-fields are allowed to be unnamed. - # - if decls[0].get("bitsize") is not None: - pass - - # When redeclaring typedef names as identifiers in inner scopes, a - # problem can occur where the identifier gets grouped into - # spec['type'], leaving decl as None. This can only occur for the - # first declarator. - # - elif decls[0]["decl"] is None: - if ( - len(spec["type"]) < 2 - or len(spec["type"][-1].names) != 1 - or not self._is_type_in_scope(spec["type"][-1].names[0]) - ): - coord = "?" - for t in spec["type"]: - if hasattr(t, "coord"): - coord = t.coord - break - self._parse_error("Invalid declaration", coord) - - # Make this look as if it came from "direct_declarator:ID" - decls[0]["decl"] = c_ast.TypeDecl( - declname=spec["type"][-1].names[0], - type=None, - quals=None, - coord=spec["type"][-1].coord, - ) - # Remove the "new" type's name from the end of spec['type'] - del spec["type"][-1] - - # A similar problem can occur where the declaration ends up looking - # like an abstract declarator. Give it a name if this is the case. - # - elif not isinstance( - decls[0]["decl"], (c_ast.Struct, c_ast.Union, c_ast.IdentifierType) - ): - decls_0_tail = decls[0]["decl"] - while not isinstance(decls_0_tail, c_ast.TypeDecl): - decls_0_tail = decls_0_tail.type - if decls_0_tail.declname is None: - decls_0_tail.declname = spec["type"][-1].names[0] - del spec["type"][-1] - - for decl in decls: - assert decl["decl"] is not None - if is_typedef: - declaration = c_ast.Typedef( - name=None, - quals=spec["qual"], - storage=spec["storage"], - type=decl["decl"], - coord=decl["decl"].coord, - ) - else: - declaration = c_ast.Decl( - name=None, - quals=spec["qual"], - storage=spec["storage"], - funcspec=spec["function"], - type=decl["decl"], - init=decl.get("init"), - bitsize=decl.get("bitsize"), - coord=decl["decl"].coord, - ) - - if isinstance( - declaration.type, (c_ast.Struct, c_ast.Union, c_ast.IdentifierType) - ): - fixed_decl = declaration - else: - fixed_decl = self._fix_decl_name_type(declaration, spec["type"]) - - # Add the type name defined by typedef to a - # symbol table (for usage in the lexer) - # - if typedef_namespace: - if is_typedef: - self._add_typedef_name(fixed_decl.name, fixed_decl.coord) - else: - self._add_identifier(fixed_decl.name, fixed_decl.coord) - - declarations.append(fixed_decl) - - return declarations - - def _build_function_definition(self, spec, decl, param_decls, body): - """Builds a function definition.""" - assert "typedef" not in spec["storage"] - - declaration = self._build_declarations( - spec=spec, decls=[dict(decl=decl, init=None)], typedef_namespace=True - )[0] - - return c_ast.FuncDef( - decl=declaration, param_decls=param_decls, body=body, coord=decl.coord - ) - - def _select_struct_union_class(self, token): - """Given a token (either STRUCT or UNION), selects the - appropriate AST class. - """ - if token == "struct": - return c_ast.Struct - else: - return c_ast.Union - - ## - ## Precedence and associativity of operators - ## - precedence = ( - ("left", "LOR"), - ("left", "LAND"), - ("left", "OR"), - ("left", "XOR"), - ("left", "AND"), - ("left", "EQ", "NE"), - ("left", "GT", "GE", "LT", "LE"), - ("left", "RSHIFT", "LSHIFT"), - ("left", "PLUS", "MINUS"), - ("left", "TIMES", "DIVIDE", "MOD"), - ) - - ## - ## Grammar productions - ## Implementation of the BNF defined in K&R2 A.13 - ## - - # Wrapper around a translation unit, to allow for empty input. - # Not strictly part of the C99 Grammar, but useful in practice. - # - def p_translation_unit_or_empty(self, p): - """translation_unit_or_empty : translation_unit - | empty - """ - if p[1] is None: - p[0] = c_ast.FileAST([]) - else: - p[0] = c_ast.FileAST(p[1]) - - def p_translation_unit_1(self, p): - """translation_unit : external_declaration""" - # Note: external_declaration is already a list - # - p[0] = p[1] - - def p_translation_unit_2(self, p): - """translation_unit : translation_unit external_declaration""" - if p[2] is not None: - p[1].extend(p[2]) - p[0] = p[1] - - # Declarations always come as lists (because they can be - # several in one line), so we wrap the function definition - # into a list as well, to make the return value of - # external_declaration homogenous. - # - def p_external_declaration_1(self, p): - """external_declaration : function_definition""" - p[0] = [p[1]] - - def p_external_declaration_2(self, p): - """external_declaration : declaration""" - p[0] = p[1] - - def p_external_declaration_3(self, p): - """external_declaration : pp_directive - | pppragma_directive - """ - p[0] = [p[1]] - - def p_external_declaration_4(self, p): - """external_declaration : SEMI""" - p[0] = None - - def p_pp_directive(self, p): - """pp_directive : PPHASH""" - self._parse_error("Directives not supported yet", self._token_coord(p, 1)) - - def p_pppragma_directive(self, p): - """pppragma_directive : PPPRAGMA - | PPPRAGMA PPPRAGMASTR - """ - if len(p) == 3: - p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2)) - else: - p[0] = c_ast.Pragma("", self._token_coord(p, 1)) - - # In function definitions, the declarator can be followed by - # a declaration list, for old "K&R style" function definitios. - # - def p_function_definition_1(self, p): - """function_definition : id_declarator declaration_list_opt compound_statement""" - # no declaration specifiers - 'int' becomes the default type - spec = dict( - qual=[], - storage=[], - type=[c_ast.IdentifierType(["int"], coord=self._token_coord(p, 1))], - function=[], - ) - - p[0] = self._build_function_definition( - spec=spec, decl=p[1], param_decls=p[2], body=p[3] - ) - - def p_function_definition_2(self, p): - """function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement""" - spec = p[1] - - p[0] = self._build_function_definition( - spec=spec, decl=p[2], param_decls=p[3], body=p[4] - ) - - def p_statement(self, p): - """statement : labeled_statement - | expression_statement - | compound_statement - | selection_statement - | iteration_statement - | jump_statement - | pppragma_directive - """ - p[0] = p[1] - - # A pragma is generally considered a decorator rather than an actual statement. - # Still, for the purposes of analyzing an abstract syntax tree of C code, - # pragma's should not be ignored and were previously treated as a statement. - # This presents a problem for constructs that take a statement such as labeled_statements, - # selection_statements, and iteration_statements, causing a misleading structure - # in the AST. For example, consider the following C code. - # - # for (int i = 0; i < 3; i++) - # #pragma omp critical - # sum += 1; - # - # This code will compile and execute "sum += 1;" as the body of the for loop. - # Previous implementations of PyCParser would render the AST for this - # block of code as follows: - # - # For: - # DeclList: - # Decl: i, [], [], [] - # TypeDecl: i, [] - # IdentifierType: ['int'] - # Constant: int, 0 - # BinaryOp: < - # ID: i - # Constant: int, 3 - # UnaryOp: p++ - # ID: i - # Pragma: omp critical - # Assignment: += - # ID: sum - # Constant: int, 1 - # - # This AST misleadingly takes the Pragma as the body of the loop and the - # assignment then becomes a sibling of the loop. - # - # To solve edge cases like these, the pragmacomp_or_statement rule groups - # a pragma and its following statement (which would otherwise be orphaned) - # using a compound block, effectively turning the above code into: - # - # for (int i = 0; i < 3; i++) { - # #pragma omp critical - # sum += 1; - # } - def p_pragmacomp_or_statement(self, p): - """pragmacomp_or_statement : pppragma_directive statement - | statement - """ - if isinstance(p[1], c_ast.Pragma) and len(p) == 3: - p[0] = c_ast.Compound( - block_items=[p[1], p[2]], coord=self._token_coord(p, 1) - ) - else: - p[0] = p[1] - - # In C, declarations can come several in a line: - # int x, *px, romulo = 5; - # - # However, for the AST, we will split them to separate Decl - # nodes. - # - # This rule splits its declarations and always returns a list - # of Decl nodes, even if it's one element long. - # - def p_decl_body(self, p): - """decl_body : declaration_specifiers init_declarator_list_opt - | declaration_specifiers_no_type id_init_declarator_list_opt - """ - spec = p[1] - - # p[2] (init_declarator_list_opt) is either a list or None - # - if p[2] is None: - # By the standard, you must have at least one declarator unless - # declaring a structure tag, a union tag, or the members of an - # enumeration. - # - ty = spec["type"] - s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum) - if len(ty) == 1 and isinstance(ty[0], s_u_or_e): - decls = [ - c_ast.Decl( - name=None, - quals=spec["qual"], - storage=spec["storage"], - funcspec=spec["function"], - type=ty[0], - init=None, - bitsize=None, - coord=ty[0].coord, - ) - ] - - # However, this case can also occur on redeclared identifiers in - # an inner scope. The trouble is that the redeclared type's name - # gets grouped into declaration_specifiers; _build_declarations - # compensates for this. - # - else: - decls = self._build_declarations( - spec=spec, - decls=[dict(decl=None, init=None)], - typedef_namespace=True, - ) - - else: - decls = self._build_declarations( - spec=spec, decls=p[2], typedef_namespace=True - ) - - p[0] = decls - - # The declaration has been split to a decl_body sub-rule and - # SEMI, because having them in a single rule created a problem - # for defining typedefs. - # - # If a typedef line was directly followed by a line using the - # type defined with the typedef, the type would not be - # recognized. This is because to reduce the declaration rule, - # the parser's lookahead asked for the token after SEMI, which - # was the type from the next line, and the lexer had no chance - # to see the updated type symbol table. - # - # Splitting solves this problem, because after seeing SEMI, - # the parser reduces decl_body, which actually adds the new - # type into the table to be seen by the lexer before the next - # line is reached. - def p_declaration(self, p): - """declaration : decl_body SEMI""" - p[0] = p[1] - - # Since each declaration is a list of declarations, this - # rule will combine all the declarations and return a single - # list - # - def p_declaration_list(self, p): - """declaration_list : declaration - | declaration_list declaration - """ - p[0] = p[1] if len(p) == 2 else p[1] + p[2] - - # To know when declaration-specifiers end and declarators begin, - # we require declaration-specifiers to have at least one - # type-specifier, and disallow typedef-names after we've seen any - # type-specifier. These are both required by the spec. - # - def p_declaration_specifiers_no_type_1(self, p): - """declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt""" - p[0] = self._add_declaration_specifier(p[2], p[1], "qual") - - def p_declaration_specifiers_no_type_2(self, p): - """declaration_specifiers_no_type : storage_class_specifier declaration_specifiers_no_type_opt""" - p[0] = self._add_declaration_specifier(p[2], p[1], "storage") - - def p_declaration_specifiers_no_type_3(self, p): - """declaration_specifiers_no_type : function_specifier declaration_specifiers_no_type_opt""" - p[0] = self._add_declaration_specifier(p[2], p[1], "function") - - def p_declaration_specifiers_1(self, p): - """declaration_specifiers : declaration_specifiers type_qualifier""" - p[0] = self._add_declaration_specifier(p[1], p[2], "qual", append=True) - - def p_declaration_specifiers_2(self, p): - """declaration_specifiers : declaration_specifiers storage_class_specifier""" - p[0] = self._add_declaration_specifier(p[1], p[2], "storage", append=True) - - def p_declaration_specifiers_3(self, p): - """declaration_specifiers : declaration_specifiers function_specifier""" - p[0] = self._add_declaration_specifier(p[1], p[2], "function", append=True) - - def p_declaration_specifiers_4(self, p): - """declaration_specifiers : declaration_specifiers type_specifier_no_typeid""" - p[0] = self._add_declaration_specifier(p[1], p[2], "type", append=True) - - def p_declaration_specifiers_5(self, p): - """declaration_specifiers : type_specifier""" - p[0] = self._add_declaration_specifier(None, p[1], "type") - - def p_declaration_specifiers_6(self, p): - """declaration_specifiers : declaration_specifiers_no_type type_specifier""" - p[0] = self._add_declaration_specifier(p[1], p[2], "type", append=True) - - def p_storage_class_specifier(self, p): - """storage_class_specifier : AUTO - | REGISTER - | STATIC - | EXTERN - | TYPEDEF - """ - p[0] = p[1] - - def p_function_specifier(self, p): - """function_specifier : INLINE""" - p[0] = p[1] - - def p_type_specifier_no_typeid(self, p): - """type_specifier_no_typeid : VOID - | _BOOL - | CHAR - | SHORT - | INT - | LONG - | FLOAT - | DOUBLE - | _COMPLEX - | SIGNED - | UNSIGNED - | __INT128 - """ - p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1)) - - def p_type_specifier(self, p): - """type_specifier : typedef_name - | enum_specifier - | struct_or_union_specifier - | type_specifier_no_typeid - """ - p[0] = p[1] - - def p_type_qualifier(self, p): - """type_qualifier : CONST - | RESTRICT - | VOLATILE - """ - p[0] = p[1] - - def p_init_declarator_list(self, p): - """init_declarator_list : init_declarator - | init_declarator_list COMMA init_declarator - """ - p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] - - # Returns a {decl= : init=} dictionary - # If there's no initializer, uses None - # - def p_init_declarator(self, p): - """init_declarator : declarator - | declarator EQUALS initializer - """ - p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None)) - - def p_id_init_declarator_list(self, p): - """id_init_declarator_list : id_init_declarator - | id_init_declarator_list COMMA init_declarator - """ - p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] - - def p_id_init_declarator(self, p): - """id_init_declarator : id_declarator - | id_declarator EQUALS initializer - """ - p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None)) - - # Require at least one type specifier in a specifier-qualifier-list - # - def p_specifier_qualifier_list_1(self, p): - """specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid""" - p[0] = self._add_declaration_specifier(p[1], p[2], "type", append=True) - - def p_specifier_qualifier_list_2(self, p): - """specifier_qualifier_list : specifier_qualifier_list type_qualifier""" - p[0] = self._add_declaration_specifier(p[1], p[2], "qual", append=True) - - def p_specifier_qualifier_list_3(self, p): - """specifier_qualifier_list : type_specifier""" - p[0] = self._add_declaration_specifier(None, p[1], "type") - - def p_specifier_qualifier_list_4(self, p): - """specifier_qualifier_list : type_qualifier_list type_specifier""" - spec = dict(qual=p[1], storage=[], type=[], function=[]) - p[0] = self._add_declaration_specifier(spec, p[2], "type", append=True) - - # TYPEID is allowed here (and in other struct/enum related tag names), because - # struct/enum tags reside in their own namespace and can be named the same as types - # - def p_struct_or_union_specifier_1(self, p): - """struct_or_union_specifier : struct_or_union ID - | struct_or_union TYPEID - """ - klass = self._select_struct_union_class(p[1]) - # None means no list of members - p[0] = klass(name=p[2], decls=None, coord=self._token_coord(p, 2)) - - def p_struct_or_union_specifier_2(self, p): - """struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close - | struct_or_union brace_open brace_close - """ - klass = self._select_struct_union_class(p[1]) - if len(p) == 4: - # Empty sequence means an empty list of members - p[0] = klass(name=None, decls=[], coord=self._token_coord(p, 2)) - else: - p[0] = klass(name=None, decls=p[3], coord=self._token_coord(p, 2)) - - def p_struct_or_union_specifier_3(self, p): - """struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close - | struct_or_union ID brace_open brace_close - | struct_or_union TYPEID brace_open struct_declaration_list brace_close - | struct_or_union TYPEID brace_open brace_close - """ - klass = self._select_struct_union_class(p[1]) - if len(p) == 5: - # Empty sequence means an empty list of members - p[0] = klass(name=p[2], decls=[], coord=self._token_coord(p, 2)) - else: - p[0] = klass(name=p[2], decls=p[4], coord=self._token_coord(p, 2)) - - def p_struct_or_union(self, p): - """struct_or_union : STRUCT - | UNION - """ - p[0] = p[1] - - # Combine all declarations into a single list - # - def p_struct_declaration_list(self, p): - """struct_declaration_list : struct_declaration - | struct_declaration_list struct_declaration - """ - if len(p) == 2: - p[0] = p[1] or [] - else: - p[0] = p[1] + (p[2] or []) - - def p_struct_declaration_1(self, p): - """struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI""" - spec = p[1] - assert "typedef" not in spec["storage"] - - if p[2] is not None: - decls = self._build_declarations(spec=spec, decls=p[2]) - - elif len(spec["type"]) == 1: - # Anonymous struct/union, gcc extension, C1x feature. - # Although the standard only allows structs/unions here, I see no - # reason to disallow other types since some compilers have typedefs - # here, and pycparser isn't about rejecting all invalid code. - # - node = spec["type"][0] - if isinstance(node, c_ast.Node): - decl_type = node - else: - decl_type = c_ast.IdentifierType(node) - - decls = self._build_declarations(spec=spec, decls=[dict(decl=decl_type)]) - - else: - # Structure/union members can have the same names as typedefs. - # The trouble is that the member's name gets grouped into - # specifier_qualifier_list; _build_declarations compensates. - # - decls = self._build_declarations( - spec=spec, decls=[dict(decl=None, init=None)] - ) - - p[0] = decls - - def p_struct_declaration_2(self, p): - """struct_declaration : SEMI""" - p[0] = None - - def p_struct_declaration_3(self, p): - """struct_declaration : pppragma_directive""" - p[0] = [p[1]] - - def p_struct_declarator_list(self, p): - """struct_declarator_list : struct_declarator - | struct_declarator_list COMMA struct_declarator - """ - p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] - - # struct_declarator passes up a dict with the keys: decl (for - # the underlying declarator) and bitsize (for the bitsize) - # - def p_struct_declarator_1(self, p): - """struct_declarator : declarator""" - p[0] = {"decl": p[1], "bitsize": None} - - def p_struct_declarator_2(self, p): - """struct_declarator : declarator COLON constant_expression - | COLON constant_expression - """ - if len(p) > 3: - p[0] = {"decl": p[1], "bitsize": p[3]} - else: - p[0] = {"decl": c_ast.TypeDecl(None, None, None), "bitsize": p[2]} - - def p_enum_specifier_1(self, p): - """enum_specifier : ENUM ID - | ENUM TYPEID - """ - p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1)) - - def p_enum_specifier_2(self, p): - """enum_specifier : ENUM brace_open enumerator_list brace_close""" - p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1)) - - def p_enum_specifier_3(self, p): - """enum_specifier : ENUM ID brace_open enumerator_list brace_close - | ENUM TYPEID brace_open enumerator_list brace_close - """ - p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1)) - - def p_enumerator_list(self, p): - """enumerator_list : enumerator - | enumerator_list COMMA - | enumerator_list COMMA enumerator - """ - if len(p) == 2: - p[0] = c_ast.EnumeratorList([p[1]], p[1].coord) - elif len(p) == 3: - p[0] = p[1] - else: - p[1].enumerators.append(p[3]) - p[0] = p[1] - - def p_enumerator(self, p): - """enumerator : ID - | ID EQUALS constant_expression - """ - if len(p) == 2: - enumerator = c_ast.Enumerator(p[1], None, self._token_coord(p, 1)) - else: - enumerator = c_ast.Enumerator(p[1], p[3], self._token_coord(p, 1)) - self._add_identifier(enumerator.name, enumerator.coord) - - p[0] = enumerator - - def p_declarator(self, p): - """declarator : id_declarator - | typeid_declarator - """ - p[0] = p[1] - - @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) - def p_xxx_declarator_1(self, p): - """xxx_declarator : direct_xxx_declarator""" - p[0] = p[1] - - @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) - def p_xxx_declarator_2(self, p): - """xxx_declarator : pointer direct_xxx_declarator""" - p[0] = self._type_modify_decl(p[2], p[1]) - - @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) - def p_direct_xxx_declarator_1(self, p): - """direct_xxx_declarator : yyy""" - p[0] = c_ast.TypeDecl( - declname=p[1], type=None, quals=None, coord=self._token_coord(p, 1) - ) - - @parameterized(("id", "ID"), ("typeid", "TYPEID")) - def p_direct_xxx_declarator_2(self, p): - """direct_xxx_declarator : LPAREN xxx_declarator RPAREN""" - p[0] = p[2] - - @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) - def p_direct_xxx_declarator_3(self, p): - """direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET""" - quals = (p[3] if len(p) > 5 else []) or [] - # Accept dimension qualifiers - # Per C99 6.7.5.3 p7 - arr = c_ast.ArrayDecl( - type=None, - dim=p[4] if len(p) > 5 else p[3], - dim_quals=quals, - coord=p[1].coord, - ) - - p[0] = self._type_modify_decl(decl=p[1], modifier=arr) - - @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) - def p_direct_xxx_declarator_4(self, p): - """direct_xxx_declarator : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET - | direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET - """ - # Using slice notation for PLY objects doesn't work in Python 3 for the - # version of PLY embedded with pycparser; see PLY Google Code issue 30. - # Work around that here by listing the two elements separately. - listed_quals = [ - item if isinstance(item, list) else [item] for item in [p[3], p[4]] - ] - dim_quals = [ - qual for sublist in listed_quals for qual in sublist if qual is not None - ] - arr = c_ast.ArrayDecl( - type=None, dim=p[5], dim_quals=dim_quals, coord=p[1].coord - ) - - p[0] = self._type_modify_decl(decl=p[1], modifier=arr) - - # Special for VLAs - # - @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) - def p_direct_xxx_declarator_5(self, p): - """direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET""" - arr = c_ast.ArrayDecl( - type=None, - dim=c_ast.ID(p[4], self._token_coord(p, 4)), - dim_quals=p[3] if p[3] != None else [], - coord=p[1].coord, - ) - - p[0] = self._type_modify_decl(decl=p[1], modifier=arr) - - @parameterized(("id", "ID"), ("typeid", "TYPEID"), ("typeid_noparen", "TYPEID")) - def p_direct_xxx_declarator_6(self, p): - """direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN - | direct_xxx_declarator LPAREN identifier_list_opt RPAREN - """ - func = c_ast.FuncDecl(args=p[3], type=None, coord=p[1].coord) - - # To see why _get_yacc_lookahead_token is needed, consider: - # typedef char TT; - # void foo(int TT) { TT = 10; } - # Outside the function, TT is a typedef, but inside (starting and - # ending with the braces) it's a parameter. The trouble begins with - # yacc's lookahead token. We don't know if we're declaring or - # defining a function until we see LBRACE, but if we wait for yacc to - # trigger a rule on that token, then TT will have already been read - # and incorrectly interpreted as TYPEID. We need to add the - # parameters to the scope the moment the lexer sees LBRACE. - # - if self._get_yacc_lookahead_token().type == "LBRACE": - if func.args is not None: - for param in func.args.params: - if isinstance(param, c_ast.EllipsisParam): - break - self._add_identifier(param.name, param.coord) - - p[0] = self._type_modify_decl(decl=p[1], modifier=func) - - def p_pointer(self, p): - """pointer : TIMES type_qualifier_list_opt - | TIMES type_qualifier_list_opt pointer - """ - coord = self._token_coord(p, 1) - # Pointer decls nest from inside out. This is important when different - # levels have different qualifiers. For example: - # - # char * const * p; - # - # Means "pointer to const pointer to char" - # - # While: - # - # char ** const p; - # - # Means "const pointer to pointer to char" - # - # So when we construct PtrDecl nestings, the leftmost pointer goes in - # as the most nested type. - nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord) - if len(p) > 3: - tail_type = p[3] - while tail_type.type is not None: - tail_type = tail_type.type - tail_type.type = nested_type - p[0] = p[3] - else: - p[0] = nested_type - - def p_type_qualifier_list(self, p): - """type_qualifier_list : type_qualifier - | type_qualifier_list type_qualifier - """ - p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]] - - def p_parameter_type_list(self, p): - """parameter_type_list : parameter_list - | parameter_list COMMA ELLIPSIS - """ - if len(p) > 2: - p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3))) - - p[0] = p[1] - - def p_parameter_list(self, p): - """parameter_list : parameter_declaration - | parameter_list COMMA parameter_declaration - """ - if len(p) == 2: # single parameter - p[0] = c_ast.ParamList([p[1]], p[1].coord) - else: - p[1].params.append(p[3]) - p[0] = p[1] - - # From ISO/IEC 9899:TC2, 6.7.5.3.11: - # "If, in a parameter declaration, an identifier can be treated either - # as a typedef name or as a parameter name, it shall be taken as a - # typedef name." - # - # Inside a parameter declaration, once we've reduced declaration specifiers, - # if we shift in an LPAREN and see a TYPEID, it could be either an abstract - # declarator or a declarator nested inside parens. This rule tells us to - # always treat it as an abstract declarator. Therefore, we only accept - # `id_declarator`s and `typeid_noparen_declarator`s. - def p_parameter_declaration_1(self, p): - """parameter_declaration : declaration_specifiers id_declarator - | declaration_specifiers typeid_noparen_declarator - """ - spec = p[1] - if not spec["type"]: - spec["type"] = [ - c_ast.IdentifierType(["int"], coord=self._token_coord(p, 1)) - ] - p[0] = self._build_declarations(spec=spec, decls=[dict(decl=p[2])])[0] - - def p_parameter_declaration_2(self, p): - """parameter_declaration : declaration_specifiers abstract_declarator_opt""" - spec = p[1] - if not spec["type"]: - spec["type"] = [ - c_ast.IdentifierType(["int"], coord=self._token_coord(p, 1)) - ] - - # Parameters can have the same names as typedefs. The trouble is that - # the parameter's name gets grouped into declaration_specifiers, making - # it look like an old-style declaration; compensate. - # - if ( - len(spec["type"]) > 1 - and len(spec["type"][-1].names) == 1 - and self._is_type_in_scope(spec["type"][-1].names[0]) - ): - decl = self._build_declarations( - spec=spec, decls=[dict(decl=p[2], init=None)] - )[0] - - # This truly is an old-style parameter declaration - # - else: - decl = c_ast.Typename( - name="", - quals=spec["qual"], - type=p[2] or c_ast.TypeDecl(None, None, None), - coord=self._token_coord(p, 2), - ) - typename = spec["type"] - decl = self._fix_decl_name_type(decl, typename) - - p[0] = decl - - def p_identifier_list(self, p): - """identifier_list : identifier - | identifier_list COMMA identifier - """ - if len(p) == 2: # single parameter - p[0] = c_ast.ParamList([p[1]], p[1].coord) - else: - p[1].params.append(p[3]) - p[0] = p[1] - - def p_initializer_1(self, p): - """initializer : assignment_expression""" - p[0] = p[1] - - def p_initializer_2(self, p): - """initializer : brace_open initializer_list_opt brace_close - | brace_open initializer_list COMMA brace_close - """ - if p[2] is None: - p[0] = c_ast.InitList([], self._token_coord(p, 1)) - else: - p[0] = p[2] - - def p_initializer_list(self, p): - """initializer_list : designation_opt initializer - | initializer_list COMMA designation_opt initializer - """ - if len(p) == 3: # single initializer - init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2]) - p[0] = c_ast.InitList([init], p[2].coord) - else: - init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4]) - p[1].exprs.append(init) - p[0] = p[1] - - def p_designation(self, p): - """designation : designator_list EQUALS""" - p[0] = p[1] - - # Designators are represented as a list of nodes, in the order in which - # they're written in the code. - # - def p_designator_list(self, p): - """designator_list : designator - | designator_list designator - """ - p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]] - - def p_designator(self, p): - """designator : LBRACKET constant_expression RBRACKET - | PERIOD identifier - """ - p[0] = p[2] - - def p_type_name(self, p): - """type_name : specifier_qualifier_list abstract_declarator_opt""" - typename = c_ast.Typename( - name="", - quals=p[1]["qual"], - type=p[2] or c_ast.TypeDecl(None, None, None), - coord=self._token_coord(p, 2), - ) - - p[0] = self._fix_decl_name_type(typename, p[1]["type"]) - - def p_abstract_declarator_1(self, p): - """abstract_declarator : pointer""" - dummytype = c_ast.TypeDecl(None, None, None) - p[0] = self._type_modify_decl(decl=dummytype, modifier=p[1]) - - def p_abstract_declarator_2(self, p): - """abstract_declarator : pointer direct_abstract_declarator""" - p[0] = self._type_modify_decl(p[2], p[1]) - - def p_abstract_declarator_3(self, p): - """abstract_declarator : direct_abstract_declarator""" - p[0] = p[1] - - # Creating and using direct_abstract_declarator_opt here - # instead of listing both direct_abstract_declarator and the - # lack of it in the beginning of _1 and _2 caused two - # shift/reduce errors. - # - def p_direct_abstract_declarator_1(self, p): - """direct_abstract_declarator : LPAREN abstract_declarator RPAREN""" - p[0] = p[2] - - def p_direct_abstract_declarator_2(self, p): - """direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET""" - arr = c_ast.ArrayDecl(type=None, dim=p[3], dim_quals=[], coord=p[1].coord) - - p[0] = self._type_modify_decl(decl=p[1], modifier=arr) - - def p_direct_abstract_declarator_3(self, p): - """direct_abstract_declarator : LBRACKET assignment_expression_opt RBRACKET""" - p[0] = c_ast.ArrayDecl( - type=c_ast.TypeDecl(None, None, None), - dim=p[2], - dim_quals=[], - coord=self._token_coord(p, 1), - ) - - def p_direct_abstract_declarator_4(self, p): - """direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET""" - arr = c_ast.ArrayDecl( - type=None, - dim=c_ast.ID(p[3], self._token_coord(p, 3)), - dim_quals=[], - coord=p[1].coord, - ) - - p[0] = self._type_modify_decl(decl=p[1], modifier=arr) - - def p_direct_abstract_declarator_5(self, p): - """direct_abstract_declarator : LBRACKET TIMES RBRACKET""" - p[0] = c_ast.ArrayDecl( - type=c_ast.TypeDecl(None, None, None), - dim=c_ast.ID(p[3], self._token_coord(p, 3)), - dim_quals=[], - coord=self._token_coord(p, 1), - ) - - def p_direct_abstract_declarator_6(self, p): - """direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN""" - func = c_ast.FuncDecl(args=p[3], type=None, coord=p[1].coord) - - p[0] = self._type_modify_decl(decl=p[1], modifier=func) - - def p_direct_abstract_declarator_7(self, p): - """direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN""" - p[0] = c_ast.FuncDecl( - args=p[2], - type=c_ast.TypeDecl(None, None, None), - coord=self._token_coord(p, 1), - ) - - # declaration is a list, statement isn't. To make it consistent, block_item - # will always be a list - # - def p_block_item(self, p): - """block_item : declaration - | statement - """ - p[0] = p[1] if isinstance(p[1], list) else [p[1]] - - # Since we made block_item a list, this just combines lists - # - def p_block_item_list(self, p): - """block_item_list : block_item - | block_item_list block_item - """ - # Empty block items (plain ';') produce [None], so ignore them - p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2] - - def p_compound_statement_1(self, p): - """compound_statement : brace_open block_item_list_opt brace_close""" - p[0] = c_ast.Compound(block_items=p[2], coord=self._token_coord(p, 1)) - - def p_labeled_statement_1(self, p): - """labeled_statement : ID COLON pragmacomp_or_statement""" - p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1)) - - def p_labeled_statement_2(self, p): - """labeled_statement : CASE constant_expression COLON pragmacomp_or_statement""" - p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1)) - - def p_labeled_statement_3(self, p): - """labeled_statement : DEFAULT COLON pragmacomp_or_statement""" - p[0] = c_ast.Default([p[3]], self._token_coord(p, 1)) - - def p_selection_statement_1(self, p): - """selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement""" - p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1)) - - def p_selection_statement_2(self, p): - """selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement""" - p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1)) - - def p_selection_statement_3(self, p): - """selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement""" - p[0] = fix_switch_cases(c_ast.Switch(p[3], p[5], self._token_coord(p, 1))) - - def p_iteration_statement_1(self, p): - """iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement""" - p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1)) - - def p_iteration_statement_2(self, p): - """iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI""" - p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1)) - - def p_iteration_statement_3(self, p): - """iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement""" - p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1)) - - def p_iteration_statement_4(self, p): - """iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement""" - p[0] = c_ast.For( - c_ast.DeclList(p[3], self._token_coord(p, 1)), - p[4], - p[6], - p[8], - self._token_coord(p, 1), - ) - - def p_jump_statement_1(self, p): - """jump_statement : GOTO ID SEMI""" - p[0] = c_ast.Goto(p[2], self._token_coord(p, 1)) - - def p_jump_statement_2(self, p): - """jump_statement : BREAK SEMI""" - p[0] = c_ast.Break(self._token_coord(p, 1)) - - def p_jump_statement_3(self, p): - """jump_statement : CONTINUE SEMI""" - p[0] = c_ast.Continue(self._token_coord(p, 1)) - - def p_jump_statement_4(self, p): - """jump_statement : RETURN expression SEMI - | RETURN SEMI - """ - p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1)) - - def p_expression_statement(self, p): - """expression_statement : expression_opt SEMI""" - if p[1] is None: - p[0] = c_ast.EmptyStatement(self._token_coord(p, 2)) - else: - p[0] = p[1] - - def p_expression(self, p): - """expression : assignment_expression - | expression COMMA assignment_expression - """ - if len(p) == 2: - p[0] = p[1] - else: - if not isinstance(p[1], c_ast.ExprList): - p[1] = c_ast.ExprList([p[1]], p[1].coord) - - p[1].exprs.append(p[3]) - p[0] = p[1] - - def p_typedef_name(self, p): - """typedef_name : TYPEID""" - p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1)) - - def p_assignment_expression(self, p): - """assignment_expression : conditional_expression - | unary_expression assignment_operator assignment_expression - """ - if len(p) == 2: - p[0] = p[1] - else: - p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord) - - # K&R2 defines these as many separate rules, to encode - # precedence and associativity. Why work hard ? I'll just use - # the built in precedence/associativity specification feature - # of PLY. (see precedence declaration above) - # - def p_assignment_operator(self, p): - """assignment_operator : EQUALS - | XOREQUAL - | TIMESEQUAL - | DIVEQUAL - | MODEQUAL - | PLUSEQUAL - | MINUSEQUAL - | LSHIFTEQUAL - | RSHIFTEQUAL - | ANDEQUAL - | OREQUAL - """ - p[0] = p[1] - - def p_constant_expression(self, p): - """constant_expression : conditional_expression""" - p[0] = p[1] - - def p_conditional_expression(self, p): - """conditional_expression : binary_expression - | binary_expression CONDOP expression COLON conditional_expression - """ - if len(p) == 2: - p[0] = p[1] - else: - p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord) - - def p_binary_expression(self, p): - """binary_expression : cast_expression - | binary_expression TIMES binary_expression - | binary_expression DIVIDE binary_expression - | binary_expression MOD binary_expression - | binary_expression PLUS binary_expression - | binary_expression MINUS binary_expression - | binary_expression RSHIFT binary_expression - | binary_expression LSHIFT binary_expression - | binary_expression LT binary_expression - | binary_expression LE binary_expression - | binary_expression GE binary_expression - | binary_expression GT binary_expression - | binary_expression EQ binary_expression - | binary_expression NE binary_expression - | binary_expression AND binary_expression - | binary_expression OR binary_expression - | binary_expression XOR binary_expression - | binary_expression LAND binary_expression - | binary_expression LOR binary_expression - """ - if len(p) == 2: - p[0] = p[1] - else: - p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord) - - def p_cast_expression_1(self, p): - """cast_expression : unary_expression""" - p[0] = p[1] - - def p_cast_expression_2(self, p): - """cast_expression : LPAREN type_name RPAREN cast_expression""" - p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1)) - - def p_unary_expression_1(self, p): - """unary_expression : postfix_expression""" - p[0] = p[1] - - def p_unary_expression_2(self, p): - """unary_expression : PLUSPLUS unary_expression - | MINUSMINUS unary_expression - | unary_operator cast_expression - """ - p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord) - - def p_unary_expression_3(self, p): - """unary_expression : SIZEOF unary_expression - | SIZEOF LPAREN type_name RPAREN - """ - p[0] = c_ast.UnaryOp( - p[1], p[2] if len(p) == 3 else p[3], self._token_coord(p, 1) - ) - - def p_unary_operator(self, p): - """unary_operator : AND - | TIMES - | PLUS - | MINUS - | NOT - | LNOT - """ - p[0] = p[1] - - def p_postfix_expression_1(self, p): - """postfix_expression : primary_expression""" - p[0] = p[1] - - def p_postfix_expression_2(self, p): - """postfix_expression : postfix_expression LBRACKET expression RBRACKET""" - p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) - - def p_postfix_expression_3(self, p): - """postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN - | postfix_expression LPAREN RPAREN - """ - p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord) - - def p_postfix_expression_4(self, p): - """postfix_expression : postfix_expression PERIOD ID - | postfix_expression PERIOD TYPEID - | postfix_expression ARROW ID - | postfix_expression ARROW TYPEID - """ - field = c_ast.ID(p[3], self._token_coord(p, 3)) - p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord) - - def p_postfix_expression_5(self, p): - """postfix_expression : postfix_expression PLUSPLUS - | postfix_expression MINUSMINUS - """ - p[0] = c_ast.UnaryOp("p" + p[2], p[1], p[1].coord) - - def p_postfix_expression_6(self, p): - """postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close - | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close - """ - p[0] = c_ast.CompoundLiteral(p[2], p[5]) - - def p_primary_expression_1(self, p): - """primary_expression : identifier""" - p[0] = p[1] - - def p_primary_expression_2(self, p): - """primary_expression : constant""" - p[0] = p[1] - - def p_primary_expression_3(self, p): - """primary_expression : unified_string_literal - | unified_wstring_literal - """ - p[0] = p[1] - - def p_primary_expression_4(self, p): - """primary_expression : LPAREN expression RPAREN""" - p[0] = p[2] - - def p_primary_expression_5(self, p): - """primary_expression : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN""" - coord = self._token_coord(p, 1) - p[0] = c_ast.FuncCall( - c_ast.ID(p[1], coord), c_ast.ExprList([p[3], p[5]], coord), coord - ) - - def p_offsetof_member_designator(self, p): - """offsetof_member_designator : identifier - | offsetof_member_designator PERIOD identifier - | offsetof_member_designator LBRACKET expression RBRACKET - """ - if len(p) == 2: - p[0] = p[1] - elif len(p) == 4: - field = c_ast.ID(p[3], self._token_coord(p, 3)) - p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord) - elif len(p) == 5: - p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) - else: - raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p)) - - def p_argument_expression_list(self, p): - """argument_expression_list : assignment_expression - | argument_expression_list COMMA assignment_expression - """ - if len(p) == 2: # single expr - p[0] = c_ast.ExprList([p[1]], p[1].coord) - else: - p[1].exprs.append(p[3]) - p[0] = p[1] - - def p_identifier(self, p): - """identifier : ID""" - p[0] = c_ast.ID(p[1], self._token_coord(p, 1)) - - def p_constant_1(self, p): - """constant : INT_CONST_DEC - | INT_CONST_OCT - | INT_CONST_HEX - | INT_CONST_BIN - """ - p[0] = c_ast.Constant("int", p[1], self._token_coord(p, 1)) - - def p_constant_2(self, p): - """constant : FLOAT_CONST - | HEX_FLOAT_CONST - """ - p[0] = c_ast.Constant("float", p[1], self._token_coord(p, 1)) - - def p_constant_3(self, p): - """constant : CHAR_CONST - | WCHAR_CONST - """ - p[0] = c_ast.Constant("char", p[1], self._token_coord(p, 1)) - - # The "unified" string and wstring literal rules are for supporting - # concatenation of adjacent string literals. - # I.e. "hello " "world" is seen by the C compiler as a single string literal - # with the value "hello world" - # - def p_unified_string_literal(self, p): - """unified_string_literal : STRING_LITERAL - | unified_string_literal STRING_LITERAL - """ - if len(p) == 2: # single literal - p[0] = c_ast.Constant("string", p[1], self._token_coord(p, 1)) - else: - p[1].value = p[1].value[:-1] + p[2][1:] - p[0] = p[1] - - def p_unified_wstring_literal(self, p): - """unified_wstring_literal : WSTRING_LITERAL - | unified_wstring_literal WSTRING_LITERAL - """ - if len(p) == 2: # single literal - p[0] = c_ast.Constant("string", p[1], self._token_coord(p, 1)) - else: - p[1].value = p[1].value.rstrip()[:-1] + p[2][2:] - p[0] = p[1] - - def p_brace_open(self, p): - """brace_open : LBRACE""" - p[0] = p[1] - p.set_lineno(0, p.lineno(1)) - - def p_brace_close(self, p): - """brace_close : RBRACE""" - p[0] = p[1] - p.set_lineno(0, p.lineno(1)) - - def p_empty(self, p): - "empty :" - p[0] = None - - def p_error(self, p): - # If error recovery is added here in the future, make sure - # _get_yacc_lookahead_token still works! - # - if p: - self._parse_error( - "before: %s" % p.value, - self._coord(lineno=p.lineno, column=self.clex.find_tok_column(p)), - ) - else: - self._parse_error("At end of input", self.clex.filename) diff --git a/src/pycparser/c_parser.pyc b/src/pycparser/c_parser.pyc deleted file mode 100644 index de833128..00000000 Binary files a/src/pycparser/c_parser.pyc and /dev/null differ diff --git a/src/pycparser/ply/LICENSE b/src/pycparser/ply/LICENSE deleted file mode 100755 index bac0d9a5..00000000 --- a/src/pycparser/ply/LICENSE +++ /dev/null @@ -1,34 +0,0 @@ -PLY (Python Lex-Yacc) Version 3.10 - -Copyright (C) 2001-2017 -David M. Beazley (Dabeaz LLC) -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -* Neither the name of the David Beazley or Dabeaz LLC may be used to - endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -More information about PLY can be obtained on the PLY webpage at: - - http://www.dabeaz.com/ply diff --git a/src/pycparser/ply/__init__.py b/src/pycparser/ply/__init__.py deleted file mode 100755 index 7039d1c5..00000000 --- a/src/pycparser/ply/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# PLY package -# Author: David Beazley (dave@dabeaz.com) - -__version__ = "3.9" -__all__ = ["lex", "yacc"] diff --git a/src/pycparser/ply/__init__.pyc b/src/pycparser/ply/__init__.pyc deleted file mode 100644 index f1d7ae9a..00000000 Binary files a/src/pycparser/ply/__init__.pyc and /dev/null differ diff --git a/src/pycparser/ply/cpp.py b/src/pycparser/ply/cpp.py deleted file mode 100755 index 823f2d25..00000000 --- a/src/pycparser/ply/cpp.py +++ /dev/null @@ -1,965 +0,0 @@ -# ----------------------------------------------------------------------------- -# cpp.py -# -# Author: David Beazley (http://www.dabeaz.com) -# Copyright (C) 2017 -# All rights reserved -# -# This module implements an ANSI-C style lexical preprocessor for PLY. -# ----------------------------------------------------------------------------- -import copy -import os.path -import re -import sys -import time - -# Some Python 3 compatibility shims -if sys.version_info.major < 3: - STRING_TYPES = (str, unicode) -else: - STRING_TYPES = str - xrange = range - -# ----------------------------------------------------------------------------- -# Default preprocessor lexer definitions. These tokens are enough to get -# a basic preprocessor working. Other modules may import these if they want -# ----------------------------------------------------------------------------- - -tokens = ( - "CPP_ID", - "CPP_INTEGER", - "CPP_FLOAT", - "CPP_STRING", - "CPP_CHAR", - "CPP_WS", - "CPP_COMMENT1", - "CPP_COMMENT2", - "CPP_POUND", - "CPP_DPOUND", -) - -literals = "+-*/%|&~^<>=!?()[]{}.,;:\\'\"" - -# Whitespace -def t_CPP_WS(t): - r"\s+" - t.lexer.lineno += t.value.count("\n") - return t - - -t_CPP_POUND = r"\#" -t_CPP_DPOUND = r"\#\#" - -# Identifier -t_CPP_ID = r"[A-Za-z_][\w_]*" - -# Floating literal -t_CPP_FLOAT = r"((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?" - -# String literal -def t_CPP_STRING(t): - r"\"([^\\\n]|(\\(.|\n)))*?\" " - t.lexer.lineno += t.value.count("\n") - return t - - -# Character constant 'c' or L'c' -def t_CPP_CHAR(t): - t.lexer.lineno += t.value.count("\n") - return t - - -# Comment -def t_CPP_COMMENT1(t): - ncr = t.value.count("\n") - t.lexer.lineno += ncr - # replace with one space or a number of '\n' - t.type = "CPP_WS" - t.value = "\n" * ncr if ncr else " " - return t - - -# Line comment -def t_CPP_COMMENT2(t): - t.type = "CPP_WS" - t.value = "\n" - return t - - -def t_error(t): - t.type = t.value[0] - t.value = t.value[0] - t.lexer.skip(1) - return t - - -# ----------------------------------------------------------------------------- -# trigraph() -# -# Given an input string, this function replaces all trigraph sequences. -# The following mapping is used: -# -# ??= # -# ??/ \ -# ??' ^ -# ??( [ -# ??) ] -# ??! | -# ??< { -# ??> } -# ??- ~ -# ----------------------------------------------------------------------------- - -_trigraph_pat = re.compile(r"""\?\?[=/\'\(\)\!<>\-]""") -_trigraph_rep = { - "=": "#", - "/": "\\", - "'": "^", - "(": "[", - ")": "]", - "!": "|", - "<": "{", - ">": "}", - "-": "~", -} - - -def trigraph(input): - return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]], input) - - -# ------------------------------------------------------------------ -# Macro object -# -# This object holds information about preprocessor macros -# -# .name - Macro name (string) -# .value - Macro value (a list of tokens) -# .arglist - List of argument names -# .variadic - Boolean indicating whether or not variadic macro -# .vararg - Name of the variadic parameter -# -# When a macro is created, the macro replacement token sequence is -# pre-scanned and used to create patch lists that are later used -# during macro expansion -# ------------------------------------------------------------------ - - -class Macro(object): - def __init__(self, name, value, arglist=None, variadic=False): - self.name = name - self.value = value - self.arglist = arglist - self.variadic = variadic - if variadic: - self.vararg = arglist[-1] - self.source = None - - -# ------------------------------------------------------------------ -# Preprocessor object -# -# Object representing a preprocessor. Contains macro definitions, -# include directories, and other information -# ------------------------------------------------------------------ - - -class Preprocessor(object): - def __init__(self, lexer=None): - if lexer is None: - lexer = lex.lexer - self.lexer = lexer - self.macros = {} - self.path = [] - self.temp_path = [] - - # Probe the lexer for selected tokens - self.lexprobe() - - tm = time.localtime() - self.define('__DATE__ "%s"' % time.strftime("%b %d %Y", tm)) - self.define('__TIME__ "%s"' % time.strftime("%H:%M:%S", tm)) - self.parser = None - - # ----------------------------------------------------------------------------- - # tokenize() - # - # Utility function. Given a string of text, tokenize into a list of tokens - # ----------------------------------------------------------------------------- - - def tokenize(self, text): - tokens = [] - self.lexer.input(text) - while True: - tok = self.lexer.token() - if not tok: - break - tokens.append(tok) - return tokens - - # --------------------------------------------------------------------- - # error() - # - # Report a preprocessor error/warning of some kind - # ---------------------------------------------------------------------- - - def error(self, file, line, msg): - print("%s:%d %s" % (file, line, msg)) - - # ---------------------------------------------------------------------- - # lexprobe() - # - # This method probes the preprocessor lexer object to discover - # the token types of symbols that are important to the preprocessor. - # If this works right, the preprocessor will simply "work" - # with any suitable lexer regardless of how tokens have been named. - # ---------------------------------------------------------------------- - - def lexprobe(self): - - # Determine the token type for identifiers - self.lexer.input("identifier") - tok = self.lexer.token() - if not tok or tok.value != "identifier": - print("Couldn't determine identifier type") - else: - self.t_ID = tok.type - - # Determine the token type for integers - self.lexer.input("12345") - tok = self.lexer.token() - if not tok or int(tok.value) != 12345: - print("Couldn't determine integer type") - else: - self.t_INTEGER = tok.type - self.t_INTEGER_TYPE = type(tok.value) - - # Determine the token type for strings enclosed in double quotes - self.lexer.input('"filename"') - tok = self.lexer.token() - if not tok or tok.value != '"filename"': - print("Couldn't determine string type") - else: - self.t_STRING = tok.type - - # Determine the token type for whitespace--if any - self.lexer.input(" ") - tok = self.lexer.token() - if not tok or tok.value != " ": - self.t_SPACE = None - else: - self.t_SPACE = tok.type - - # Determine the token type for newlines - self.lexer.input("\n") - tok = self.lexer.token() - if not tok or tok.value != "\n": - self.t_NEWLINE = None - print("Couldn't determine token for newlines") - else: - self.t_NEWLINE = tok.type - - self.t_WS = (self.t_SPACE, self.t_NEWLINE) - - # Check for other characters used by the preprocessor - chars = ["<", ">", "#", "##", "\\", "(", ")", ",", "."] - for c in chars: - self.lexer.input(c) - tok = self.lexer.token() - if not tok or tok.value != c: - print("Unable to lex '%s' required for preprocessor" % c) - - # ---------------------------------------------------------------------- - # add_path() - # - # Adds a search path to the preprocessor. - # ---------------------------------------------------------------------- - - def add_path(self, path): - self.path.append(path) - - # ---------------------------------------------------------------------- - # group_lines() - # - # Given an input string, this function splits it into lines. Trailing whitespace - # is removed. Any line ending with \ is grouped with the next line. This - # function forms the lowest level of the preprocessor---grouping into text into - # a line-by-line format. - # ---------------------------------------------------------------------- - - def group_lines(self, input): - lex = self.lexer.clone() - lines = [x.rstrip() for x in input.splitlines()] - for i in xrange(len(lines)): - j = i + 1 - while lines[i].endswith("\\") and (j < len(lines)): - lines[i] = lines[i][:-1] + lines[j] - lines[j] = "" - j += 1 - - input = "\n".join(lines) - lex.input(input) - lex.lineno = 1 - - current_line = [] - while True: - tok = lex.token() - if not tok: - break - current_line.append(tok) - if tok.type in self.t_WS and "\n" in tok.value: - yield current_line - current_line = [] - - if current_line: - yield current_line - - # ---------------------------------------------------------------------- - # tokenstrip() - # - # Remove leading/trailing whitespace tokens from a token list - # ---------------------------------------------------------------------- - - def tokenstrip(self, tokens): - i = 0 - while i < len(tokens) and tokens[i].type in self.t_WS: - i += 1 - del tokens[:i] - i = len(tokens) - 1 - while i >= 0 and tokens[i].type in self.t_WS: - i -= 1 - del tokens[i + 1 :] - return tokens - - # ---------------------------------------------------------------------- - # collect_args() - # - # Collects comma separated arguments from a list of tokens. The arguments - # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions) - # where tokencount is the number of tokens consumed, args is a list of arguments, - # and positions is a list of integers containing the starting index of each - # argument. Each argument is represented by a list of tokens. - # - # When collecting arguments, leading and trailing whitespace is removed - # from each argument. - # - # This function properly handles nested parenthesis and commas---these do not - # define new arguments. - # ---------------------------------------------------------------------- - - def collect_args(self, tokenlist): - args = [] - positions = [] - current_arg = [] - nesting = 1 - tokenlen = len(tokenlist) - - # Search for the opening '('. - i = 0 - while (i < tokenlen) and (tokenlist[i].type in self.t_WS): - i += 1 - - if (i < tokenlen) and (tokenlist[i].value == "("): - positions.append(i + 1) - else: - self.error( - self.source, tokenlist[0].lineno, "Missing '(' in macro arguments" - ) - return 0, [], [] - - i += 1 - - while i < tokenlen: - t = tokenlist[i] - if t.value == "(": - current_arg.append(t) - nesting += 1 - elif t.value == ")": - nesting -= 1 - if nesting == 0: - if current_arg: - args.append(self.tokenstrip(current_arg)) - positions.append(i) - return i + 1, args, positions - current_arg.append(t) - elif t.value == "," and nesting == 1: - args.append(self.tokenstrip(current_arg)) - positions.append(i + 1) - current_arg = [] - else: - current_arg.append(t) - i += 1 - - # Missing end argument - self.error(self.source, tokenlist[-1].lineno, "Missing ')' in macro arguments") - return 0, [], [] - - # ---------------------------------------------------------------------- - # macro_prescan() - # - # Examine the macro value (token sequence) and identify patch points - # This is used to speed up macro expansion later on---we'll know - # right away where to apply patches to the value to form the expansion - # ---------------------------------------------------------------------- - - def macro_prescan(self, macro): - macro.patch = [] # Standard macro arguments - macro.str_patch = [] # String conversion expansion - macro.var_comma_patch = [] # Variadic macro comma patch - i = 0 - while i < len(macro.value): - if ( - macro.value[i].type == self.t_ID - and macro.value[i].value in macro.arglist - ): - argnum = macro.arglist.index(macro.value[i].value) - # Conversion of argument to a string - if i > 0 and macro.value[i - 1].value == "#": - macro.value[i] = copy.copy(macro.value[i]) - macro.value[i].type = self.t_STRING - del macro.value[i - 1] - macro.str_patch.append((argnum, i - 1)) - continue - # Concatenation - elif i > 0 and macro.value[i - 1].value == "##": - macro.patch.append(("c", argnum, i - 1)) - del macro.value[i - 1] - continue - elif (i + 1) < len(macro.value) and macro.value[i + 1].value == "##": - macro.patch.append(("c", argnum, i)) - i += 1 - continue - # Standard expansion - else: - macro.patch.append(("e", argnum, i)) - elif macro.value[i].value == "##": - if ( - macro.variadic - and (i > 0) - and (macro.value[i - 1].value == ",") - and ((i + 1) < len(macro.value)) - and (macro.value[i + 1].type == self.t_ID) - and (macro.value[i + 1].value == macro.vararg) - ): - macro.var_comma_patch.append(i - 1) - i += 1 - macro.patch.sort(key=lambda x: x[2], reverse=True) - - # ---------------------------------------------------------------------- - # macro_expand_args() - # - # Given a Macro and list of arguments (each a token list), this method - # returns an expanded version of a macro. The return value is a token sequence - # representing the replacement macro tokens - # ---------------------------------------------------------------------- - - def macro_expand_args(self, macro, args): - # Make a copy of the macro token sequence - rep = [copy.copy(_x) for _x in macro.value] - - # Make string expansion patches. These do not alter the length of the replacement sequence - - str_expansion = {} - for argnum, i in macro.str_patch: - if argnum not in str_expansion: - str_expansion[argnum] = ( - '"%s"' % "".join([x.value for x in args[argnum]]) - ).replace("\\", "\\\\") - rep[i] = copy.copy(rep[i]) - rep[i].value = str_expansion[argnum] - - # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid - comma_patch = False - if macro.variadic and not args[-1]: - for i in macro.var_comma_patch: - rep[i] = None - comma_patch = True - - # Make all other patches. The order of these matters. It is assumed that the patch list - # has been sorted in reverse order of patch location since replacements will cause the - # size of the replacement sequence to expand from the patch point. - - expanded = {} - for ptype, argnum, i in macro.patch: - # Concatenation. Argument is left unexpanded - if ptype == "c": - rep[i : i + 1] = args[argnum] - # Normal expansion. Argument is macro expanded first - elif ptype == "e": - if argnum not in expanded: - expanded[argnum] = self.expand_macros(args[argnum]) - rep[i : i + 1] = expanded[argnum] - - # Get rid of removed comma if necessary - if comma_patch: - rep = [_i for _i in rep if _i] - - return rep - - # ---------------------------------------------------------------------- - # expand_macros() - # - # Given a list of tokens, this function performs macro expansion. - # The expanded argument is a dictionary that contains macros already - # expanded. This is used to prevent infinite recursion. - # ---------------------------------------------------------------------- - - def expand_macros(self, tokens, expanded=None): - if expanded is None: - expanded = {} - i = 0 - while i < len(tokens): - t = tokens[i] - if t.type == self.t_ID: - if t.value in self.macros and t.value not in expanded: - # Yes, we found a macro match - expanded[t.value] = True - - m = self.macros[t.value] - if not m.arglist: - # A simple macro - ex = self.expand_macros( - [copy.copy(_x) for _x in m.value], expanded - ) - for e in ex: - e.lineno = t.lineno - tokens[i : i + 1] = ex - i += len(ex) - else: - # A macro with arguments - j = i + 1 - while j < len(tokens) and tokens[j].type in self.t_WS: - j += 1 - if tokens[j].value == "(": - tokcount, args, positions = self.collect_args(tokens[j:]) - if not m.variadic and len(args) != len(m.arglist): - self.error( - self.source, - t.lineno, - "Macro %s requires %d arguments" - % (t.value, len(m.arglist)), - ) - i = j + tokcount - elif m.variadic and len(args) < len(m.arglist) - 1: - if len(m.arglist) > 2: - self.error( - self.source, - t.lineno, - "Macro %s must have at least %d arguments" - % (t.value, len(m.arglist) - 1), - ) - else: - self.error( - self.source, - t.lineno, - "Macro %s must have at least %d argument" - % (t.value, len(m.arglist) - 1), - ) - i = j + tokcount - else: - if m.variadic: - if len(args) == len(m.arglist) - 1: - args.append([]) - else: - args[len(m.arglist) - 1] = tokens[ - j - + positions[len(m.arglist) - 1] : j - + tokcount - - 1 - ] - del args[len(m.arglist) :] - - # Get macro replacement text - rep = self.macro_expand_args(m, args) - rep = self.expand_macros(rep, expanded) - for r in rep: - r.lineno = t.lineno - tokens[i : j + tokcount] = rep - i += len(rep) - del expanded[t.value] - continue - elif t.value == "__LINE__": - t.type = self.t_INTEGER - t.value = self.t_INTEGER_TYPE(t.lineno) - - i += 1 - return tokens - - # ---------------------------------------------------------------------- - # evalexpr() - # - # Evaluate an expression token sequence for the purposes of evaluating - # integral expressions. - # ---------------------------------------------------------------------- - - def evalexpr(self, tokens): - # tokens = tokenize(line) - # Search for defined macros - i = 0 - while i < len(tokens): - if tokens[i].type == self.t_ID and tokens[i].value == "defined": - j = i + 1 - needparen = False - result = "0L" - while j < len(tokens): - if tokens[j].type in self.t_WS: - j += 1 - continue - elif tokens[j].type == self.t_ID: - if tokens[j].value in self.macros: - result = "1L" - else: - result = "0L" - if not needparen: - break - elif tokens[j].value == "(": - needparen = True - elif tokens[j].value == ")": - break - else: - self.error(self.source, tokens[i].lineno, "Malformed defined()") - j += 1 - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE(result) - del tokens[i + 1 : j + 1] - i += 1 - tokens = self.expand_macros(tokens) - for i, t in enumerate(tokens): - if t.type == self.t_ID: - tokens[i] = copy.copy(t) - tokens[i].type = self.t_INTEGER - tokens[i].value = self.t_INTEGER_TYPE("0L") - elif t.type == self.t_INTEGER: - tokens[i] = copy.copy(t) - # Strip off any trailing suffixes - tokens[i].value = str(tokens[i].value) - while tokens[i].value[-1] not in "0123456789abcdefABCDEF": - tokens[i].value = tokens[i].value[:-1] - - expr = "".join([str(x.value) for x in tokens]) - expr = expr.replace("&&", " and ") - expr = expr.replace("||", " or ") - expr = expr.replace("!", " not ") - try: - result = eval(expr) - except Exception: - self.error(self.source, tokens[0].lineno, "Couldn't evaluate expression") - result = 0 - return result - - # ---------------------------------------------------------------------- - # parsegen() - # - # Parse an input string/ - # ---------------------------------------------------------------------- - def parsegen(self, input, source=None): - - # Replace trigraph sequences - t = trigraph(input) - lines = self.group_lines(t) - - if not source: - source = "" - - self.define('__FILE__ "%s"' % source) - - self.source = source - chunk = [] - enable = True - iftrigger = False - ifstack = [] - - for x in lines: - for i, tok in enumerate(x): - if tok.type not in self.t_WS: - break - if tok.value == "#": - # Preprocessor directive - - # insert necessary whitespace instead of eaten tokens - for tok in x: - if tok.type in self.t_WS and "\n" in tok.value: - chunk.append(tok) - - dirtokens = self.tokenstrip(x[i + 1 :]) - if dirtokens: - name = dirtokens[0].value - args = self.tokenstrip(dirtokens[1:]) - else: - name = "" - args = [] - - if name == "define": - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - self.define(args) - elif name == "include": - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - oldfile = self.macros["__FILE__"] - for tok in self.include(args): - yield tok - self.macros["__FILE__"] = oldfile - self.source = source - elif name == "undef": - if enable: - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - self.undef(args) - elif name == "ifdef": - ifstack.append((enable, iftrigger)) - if enable: - if not args[0].value in self.macros: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == "ifndef": - ifstack.append((enable, iftrigger)) - if enable: - if args[0].value in self.macros: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == "if": - ifstack.append((enable, iftrigger)) - if enable: - result = self.evalexpr(args) - if not result: - enable = False - iftrigger = False - else: - iftrigger = True - elif name == "elif": - if ifstack: - if ifstack[-1][ - 0 - ]: # We only pay attention if outer "if" allows this - if enable: # If already true, we flip enable False - enable = False - elif ( - not iftrigger - ): # If False, but not triggered yet, we'll check expression - result = self.evalexpr(args) - if result: - enable = True - iftrigger = True - else: - self.error(self.source, dirtokens[0].lineno, "Misplaced #elif") - - elif name == "else": - if ifstack: - if ifstack[-1][0]: - if enable: - enable = False - elif not iftrigger: - enable = True - iftrigger = True - else: - self.error(self.source, dirtokens[0].lineno, "Misplaced #else") - - elif name == "endif": - if ifstack: - enable, iftrigger = ifstack.pop() - else: - self.error(self.source, dirtokens[0].lineno, "Misplaced #endif") - else: - # Unknown preprocessor directive - pass - - else: - # Normal text - if enable: - chunk.extend(x) - - for tok in self.expand_macros(chunk): - yield tok - chunk = [] - - # ---------------------------------------------------------------------- - # include() - # - # Implementation of file-inclusion - # ---------------------------------------------------------------------- - - def include(self, tokens): - # Try to extract the filename and then process an include file - if not tokens: - return - if tokens: - if tokens[0].value != "<" and tokens[0].type != self.t_STRING: - tokens = self.expand_macros(tokens) - - if tokens[0].value == "<": - # Include <...> - i = 1 - while i < len(tokens): - if tokens[i].value == ">": - break - i += 1 - else: - print("Malformed #include <...>") - return - filename = "".join([x.value for x in tokens[1:i]]) - path = self.path + [""] + self.temp_path - elif tokens[0].type == self.t_STRING: - filename = tokens[0].value[1:-1] - path = self.temp_path + [""] + self.path - else: - print("Malformed #include statement") - return - for p in path: - iname = os.path.join(p, filename) - try: - data = open(iname, "r").read() - dname = os.path.dirname(iname) - if dname: - self.temp_path.insert(0, dname) - for tok in self.parsegen(data, filename): - yield tok - if dname: - del self.temp_path[0] - break - except IOError: - pass - else: - print("Couldn't find '%s'" % filename) - - # ---------------------------------------------------------------------- - # define() - # - # Define a new macro - # ---------------------------------------------------------------------- - - def define(self, tokens): - if isinstance(tokens, STRING_TYPES): - tokens = self.tokenize(tokens) - - linetok = tokens - try: - name = linetok[0] - if len(linetok) > 1: - mtype = linetok[1] - else: - mtype = None - if not mtype: - m = Macro(name.value, []) - self.macros[name.value] = m - elif mtype.type in self.t_WS: - # A normal macro - m = Macro(name.value, self.tokenstrip(linetok[2:])) - self.macros[name.value] = m - elif mtype.value == "(": - # A macro with arguments - tokcount, args, positions = self.collect_args(linetok[1:]) - variadic = False - for a in args: - if variadic: - print("No more arguments may follow a variadic argument") - break - astr = "".join([str(_i.value) for _i in a]) - if astr == "...": - variadic = True - a[0].type = self.t_ID - a[0].value = "__VA_ARGS__" - variadic = True - del a[1:] - continue - elif astr[-3:] == "..." and a[0].type == self.t_ID: - variadic = True - del a[1:] - # If, for some reason, "." is part of the identifier, strip off the name for the purposes - # of macro expansion - if a[0].value[-3:] == "...": - a[0].value = a[0].value[:-3] - continue - if len(a) > 1 or a[0].type != self.t_ID: - print("Invalid macro argument") - break - else: - mvalue = self.tokenstrip(linetok[1 + tokcount :]) - i = 0 - while i < len(mvalue): - if i + 1 < len(mvalue): - if ( - mvalue[i].type in self.t_WS - and mvalue[i + 1].value == "##" - ): - del mvalue[i] - continue - elif ( - mvalue[i].value == "##" - and mvalue[i + 1].type in self.t_WS - ): - del mvalue[i + 1] - i += 1 - m = Macro(name.value, mvalue, [x[0].value for x in args], variadic) - self.macro_prescan(m) - self.macros[name.value] = m - else: - print("Bad macro definition") - except LookupError: - print("Bad macro definition") - - # ---------------------------------------------------------------------- - # undef() - # - # Undefine a macro - # ---------------------------------------------------------------------- - - def undef(self, tokens): - id = tokens[0].value - try: - del self.macros[id] - except LookupError: - pass - - # ---------------------------------------------------------------------- - # parse() - # - # Parse input text. - # ---------------------------------------------------------------------- - def parse(self, input, source=None, ignore={}): - self.ignore = ignore - self.parser = self.parsegen(input, source) - - # ---------------------------------------------------------------------- - # token() - # - # Method to return individual tokens - # ---------------------------------------------------------------------- - def token(self): - try: - while True: - tok = next(self.parser) - if tok.type not in self.ignore: - return tok - except StopIteration: - self.parser = None - return None - - -if __name__ == "__main__": - import ply.lex as lex - - lexer = lex.lex() - - # Run a preprocessor - import sys - - f = open(sys.argv[1]) - input = f.read() - - p = Preprocessor(lexer) - p.parse(input, sys.argv[1]) - while True: - tok = p.token() - if not tok: - break - print(p.source, tok) diff --git a/src/pycparser/ply/ctokens.py b/src/pycparser/ply/ctokens.py deleted file mode 100755 index cf9ef71e..00000000 --- a/src/pycparser/ply/ctokens.py +++ /dev/null @@ -1,156 +0,0 @@ -# ---------------------------------------------------------------------- -# ctokens.py -# -# Token specifications for symbols in ANSI C and C++. This file is -# meant to be used as a library in other tokenizers. -# ---------------------------------------------------------------------- - -# Reserved words - -tokens = [ - # Literals (identifier, integer constant, float constant, string constant, char const) - "ID", - "TYPEID", - "INTEGER", - "FLOAT", - "STRING", - "CHARACTER", - # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) - "PLUS", - "MINUS", - "TIMES", - "DIVIDE", - "MODULO", - "OR", - "AND", - "NOT", - "XOR", - "LSHIFT", - "RSHIFT", - "LOR", - "LAND", - "LNOT", - "LT", - "LE", - "GT", - "GE", - "EQ", - "NE", - # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) - "EQUALS", - "TIMESEQUAL", - "DIVEQUAL", - "MODEQUAL", - "PLUSEQUAL", - "MINUSEQUAL", - "LSHIFTEQUAL", - "RSHIFTEQUAL", - "ANDEQUAL", - "XOREQUAL", - "OREQUAL", - # Increment/decrement (++,--) - "INCREMENT", - "DECREMENT", - # Structure dereference (->) - "ARROW", - # Ternary operator (?) - "TERNARY", - # Delimeters ( ) [ ] { } , . ; : - "LPAREN", - "RPAREN", - "LBRACKET", - "RBRACKET", - "LBRACE", - "RBRACE", - "COMMA", - "PERIOD", - "SEMI", - "COLON", - # Ellipsis (...) - "ELLIPSIS", -] - -# Operators -t_PLUS = r"\+" -t_MINUS = r"-" -t_TIMES = r"\*" -t_DIVIDE = r"/" -t_MODULO = r"%" -t_OR = r"\|" -t_AND = r"&" -t_NOT = r"~" -t_XOR = r"\^" -t_LSHIFT = r"<<" -t_RSHIFT = r">>" -t_LOR = r"\|\|" -t_LAND = r"&&" -t_LNOT = r"!" -t_LT = r"<" -t_GT = r">" -t_LE = r"<=" -t_GE = r">=" -t_EQ = r"==" -t_NE = r"!=" - -# Assignment operators - -t_EQUALS = r"=" -t_TIMESEQUAL = r"\*=" -t_DIVEQUAL = r"/=" -t_MODEQUAL = r"%=" -t_PLUSEQUAL = r"\+=" -t_MINUSEQUAL = r"-=" -t_LSHIFTEQUAL = r"<<=" -t_RSHIFTEQUAL = r">>=" -t_ANDEQUAL = r"&=" -t_OREQUAL = r"\|=" -t_XOREQUAL = r"\^=" - -# Increment/decrement -t_INCREMENT = r"\+\+" -t_DECREMENT = r"--" - -# -> -t_ARROW = r"->" - -# ? -t_TERNARY = r"\?" - -# Delimeters -t_LPAREN = r"\(" -t_RPAREN = r"\)" -t_LBRACKET = r"\[" -t_RBRACKET = r"\]" -t_LBRACE = r"\{" -t_RBRACE = r"\}" -t_COMMA = r"," -t_PERIOD = r"\." -t_SEMI = r";" -t_COLON = r":" -t_ELLIPSIS = r"\.\.\." - -# Identifiers -t_ID = r"[A-Za-z_][A-Za-z0-9_]*" - -# Integer literal -t_INTEGER = r"\d+([uU]|[lL]|[uU][lL]|[lL][uU])?" - -# Floating literal -t_FLOAT = r"((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?" - -# String literal -t_STRING = r"\"([^\\\n]|(\\.))*?\"" - -# Character constant 'c' or L'c' -t_CHARACTER = r"(L)?\'([^\\\n]|(\\.))*?\'" - -# Comment (C-Style) -def t_COMMENT(t): - t.lexer.lineno += t.value.count("\n") - return t - - -# Comment (C++-Style) -def t_CPPCOMMENT(t): - t.lexer.lineno += 1 - return t diff --git a/src/pycparser/ply/lex.py b/src/pycparser/ply/lex.py deleted file mode 100755 index 1047c92c..00000000 --- a/src/pycparser/ply/lex.py +++ /dev/null @@ -1,1235 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: lex.py -# -# Copyright (C) 2001-2017 -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- - -__version__ = "3.10" -__tabversion__ = "3.10" - -import copy -import inspect -import os -import re -import sys -import types - -# This tuple contains known string types -try: - # Python 2.6 - StringTypes = (types.StringType, types.UnicodeType) -except AttributeError: - # Python 3.0 - StringTypes = (str, bytes) - -# This regular expression is used to match valid token names -_is_identifier = re.compile(r"^[a-zA-Z0-9_]+$") - -# Exception thrown when invalid token encountered and no default error -# handler is defined. -class LexError(Exception): - def __init__(self, message, s): - self.args = (message,) - self.text = s - - -# Token class. This class is used to represent the tokens produced. -class LexToken(object): - def __str__(self): - return "LexToken(%s,%r,%d,%d)" % ( - self.type, - self.value, - self.lineno, - self.lexpos, - ) - - def __repr__(self): - return str(self) - - -# This object is a stand-in for a logging object created by the -# logging module. - - -class PlyLogger(object): - def __init__(self, f): - self.f = f - - def critical(self, msg, *args, **kwargs): - self.f.write((msg % args) + "\n") - - def warning(self, msg, *args, **kwargs): - self.f.write("WARNING: " + (msg % args) + "\n") - - def error(self, msg, *args, **kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") - - info = critical - debug = critical - - -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self, name): - return self - - def __call__(self, *args, **kwargs): - return self - - -# ----------------------------------------------------------------------------- -# === Lexing Engine === -# -# The following Lexer class implements the lexer runtime. There are only -# a few public methods and attributes: -# -# input() - Store a new string in the lexer -# token() - Get the next token -# clone() - Clone the lexer -# -# lineno - Current line number -# lexpos - Current position in the input string -# ----------------------------------------------------------------------------- - - -class Lexer: - def __init__(self): - self.lexre = None # Master regular expression. This is a list of - # tuples (re, findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules - self.lexretext = None # Current regular expression strings - self.lexstatere = {} # Dictionary mapping lexer states to master regexs - self.lexstateretext = {} # Dictionary mapping lexer states to regex strings - self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = "INITIAL" # Current lexer state - self.lexstatestack = [] # Stack of lexer states - self.lexstateinfo = None # State information - self.lexstateignore = {} # Dictionary of ignored characters for each state - self.lexstateerrorf = {} # Dictionary of error functions for each state - self.lexstateeoff = {} # Dictionary of eof functions for each state - self.lexreflags = 0 # Optional re compile flags - self.lexdata = None # Actual input data (as a string) - self.lexpos = 0 # Current position in input text - self.lexlen = 0 # Length of the input text - self.lexerrorf = None # Error rule (if any) - self.lexeoff = None # EOF rule (if any) - self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through - self.lexmodule = None # Module - self.lineno = 1 # Current line number - self.lexoptimize = False # Optimized mode - - def clone(self, object=None): - c = copy.copy(self) - - # If the object parameter has been supplied, it means we are attaching the - # lexer to a new object. In this case, we have to rebind all methods in - # the lexstatere and lexstateerrorf tables. - - if object: - newtab = {} - for key, ritem in self.lexstatere.items(): - newre = [] - for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object, f[0].__name__), f[1])) - newre.append((cre, newfindex)) - newtab[key] = newre - c.lexstatere = newtab - c.lexstateerrorf = {} - for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object, ef.__name__) - c.lexmodule = object - return c - - # ------------------------------------------------------------ - # writetab() - Write lexer information to a table file - # ------------------------------------------------------------ - def writetab(self, lextab, outputdir=""): - if isinstance(lextab, types.ModuleType): - raise IOError("Won't overwrite existing lextab module") - basetabmodule = lextab.split(".")[-1] - filename = os.path.join(outputdir, basetabmodule) + ".py" - with open(filename, "w") as tf: - tf.write( - "# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" - % (basetabmodule, __version__) - ) - tf.write("_tabversion = %s\n" % repr(__tabversion__)) - tf.write("_lextokens = set(%s)\n" % repr(tuple(self.lextokens))) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) - - # Rewrite the lexstatere table, replacing function objects with function names - tabre = {} - for statename, lre in self.lexstatere.items(): - titem = [] - for (pat, func), retext, renames in zip( - lre, self.lexstateretext[statename], self.lexstaterenames[statename] - ): - titem.append((retext, _funcs_to_names(func, renames))) - tabre[statename] = titem - - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) - - taberr = {} - for statename, ef in self.lexstateerrorf.items(): - taberr[statename] = ef.__name__ if ef else None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - - tabeof = {} - for statename, ef in self.lexstateeoff.items(): - tabeof[statename] = ef.__name__ if ef else None - tf.write("_lexstateeoff = %s\n" % repr(tabeof)) - - # ------------------------------------------------------------ - # readtab() - Read lexer information from a tab file - # ------------------------------------------------------------ - def readtab(self, tabfile, fdict): - if isinstance(tabfile, types.ModuleType): - lextab = tabfile - else: - exec("import %s" % tabfile) - lextab = sys.modules[tabfile] - - if getattr(lextab, "_tabversion", "0.0") != __tabversion__: - raise ImportError("Inconsistent PLY version") - - self.lextokens = lextab._lextokens - self.lexreflags = lextab._lexreflags - self.lexliterals = lextab._lexliterals - self.lextokens_all = self.lextokens | set(self.lexliterals) - self.lexstateinfo = lextab._lexstateinfo - self.lexstateignore = lextab._lexstateignore - self.lexstatere = {} - self.lexstateretext = {} - for statename, lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for pat, func_name in lre: - titem.append( - ( - re.compile(pat, lextab._lexreflags), - _names_to_funcs(func_name, fdict), - ) - ) - - self.lexstatere[statename] = titem - self.lexstateretext[statename] = txtitem - - self.lexstateerrorf = {} - for statename, ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[statename] = fdict[ef] - - self.lexstateeoff = {} - for statename, ef in lextab._lexstateeoff.items(): - self.lexstateeoff[statename] = fdict[ef] - - self.begin("INITIAL") - - # ------------------------------------------------------------ - # input() - Push a new string into the lexer - # ------------------------------------------------------------ - def input(self, s): - # Pull off the first character to see if s looks like a string - c = s[:1] - if not isinstance(c, StringTypes): - raise ValueError("Expected a string") - self.lexdata = s - self.lexpos = 0 - self.lexlen = len(s) - - # ------------------------------------------------------------ - # begin() - Changes the lexing state - # ------------------------------------------------------------ - def begin(self, state): - if state not in self.lexstatere: - raise ValueError("Undefined state") - self.lexre = self.lexstatere[state] - self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state, "") - self.lexerrorf = self.lexstateerrorf.get(state, None) - self.lexeoff = self.lexstateeoff.get(state, None) - self.lexstate = state - - # ------------------------------------------------------------ - # push_state() - Changes the lexing state and saves old on stack - # ------------------------------------------------------------ - def push_state(self, state): - self.lexstatestack.append(self.lexstate) - self.begin(state) - - # ------------------------------------------------------------ - # pop_state() - Restores the previous state - # ------------------------------------------------------------ - def pop_state(self): - self.begin(self.lexstatestack.pop()) - - # ------------------------------------------------------------ - # current_state() - Returns the current lexing state - # ------------------------------------------------------------ - def current_state(self): - return self.lexstate - - # ------------------------------------------------------------ - # skip() - Skip ahead n characters - # ------------------------------------------------------------ - def skip(self, n): - self.lexpos += n - - # ------------------------------------------------------------ - # opttoken() - Return the next token from the Lexer - # - # Note: This function has been carefully implemented to be as fast - # as possible. Don't make changes unless you really know what - # you are doing - # ------------------------------------------------------------ - def token(self): - # Make local copies of frequently referenced attributes - lexpos = self.lexpos - lexlen = self.lexlen - lexignore = self.lexignore - lexdata = self.lexdata - - while lexpos < lexlen: - # This code provides some short-circuit code for whitespace, tabs, and other ignored characters - if lexdata[lexpos] in lexignore: - lexpos += 1 - continue - - # Look for a regular expression match - for lexre, lexindexfunc in self.lexre: - m = lexre.match(lexdata, lexpos) - if not m: - continue - - # Create a token for return - tok = LexToken() - tok.value = m.group() - tok.lineno = self.lineno - tok.lexpos = lexpos - - i = m.lastindex - func, tok.type = lexindexfunc[i] - - if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break - - lexpos = m.end() - - # If token is processed by a function, call it - - tok.lexer = self # Set additional attributes useful in token rules - self.lexmatch = m - self.lexpos = lexpos - - newtok = func(tok) - - # Every function must return a token, if nothing, we just move to next token - if not newtok: - lexpos = ( - self.lexpos - ) # This is here in case user has updated lexpos. - lexignore = ( - self.lexignore - ) # This is here in case there was a state change - break - - # Verify type of the token. If not in the token map, raise an error - if not self.lexoptimize: - if newtok.type not in self.lextokens_all: - raise LexError( - "%s:%d: Rule '%s' returned an unknown token type '%s'" - % ( - func.__code__.co_filename, - func.__code__.co_firstlineno, - func.__name__, - newtok.type, - ), - lexdata[lexpos:], - ) - - return newtok - else: - # No match, see if in literals - if lexdata[lexpos] in self.lexliterals: - tok = LexToken() - tok.value = lexdata[lexpos] - tok.lineno = self.lineno - tok.type = tok.value - tok.lexpos = lexpos - self.lexpos = lexpos + 1 - return tok - - # No match. Call t_error() if defined. - if self.lexerrorf: - tok = LexToken() - tok.value = self.lexdata[lexpos:] - tok.lineno = self.lineno - tok.type = "error" - tok.lexer = self - tok.lexpos = lexpos - self.lexpos = lexpos - newtok = self.lexerrorf(tok) - if lexpos == self.lexpos: - # Error method didn't change text position at all. This is an error. - raise LexError( - "Scanning error. Illegal character '%s'" - % (lexdata[lexpos]), - lexdata[lexpos:], - ) - lexpos = self.lexpos - if not newtok: - continue - return newtok - - self.lexpos = lexpos - raise LexError( - "Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), - lexdata[lexpos:], - ) - - if self.lexeoff: - tok = LexToken() - tok.type = "eof" - tok.value = "" - tok.lineno = self.lineno - tok.lexpos = lexpos - tok.lexer = self - self.lexpos = lexpos - newtok = self.lexeoff(tok) - return newtok - - self.lexpos = lexpos + 1 - if self.lexdata is None: - raise RuntimeError("No input string given with input()") - return None - - # Iterator interface - def __iter__(self): - return self - - def next(self): - t = self.token() - if t is None: - raise StopIteration - return t - - __next__ = next - - -# ----------------------------------------------------------------------------- -# ==== Lex Builder === -# -# The functions and classes below are used to collect lexing information -# and build a Lexer object from it. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# _get_regex(func) -# -# Returns the regular expression assigned to a function either as a doc string -# or as a .regex attribute attached by the @TOKEN decorator. -# ----------------------------------------------------------------------------- -def _get_regex(func): - return getattr(func, "regex", func.__doc__) - - -# ----------------------------------------------------------------------------- -# get_caller_module_dict() -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- -def get_caller_module_dict(levels): - f = sys._getframe(levels) - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - return ldict - - -# ----------------------------------------------------------------------------- -# _funcs_to_names() -# -# Given a list of regular expression functions, this converts it to a list -# suitable for output to a table file -# ----------------------------------------------------------------------------- -def _funcs_to_names(funclist, namelist): - result = [] - for f, name in zip(funclist, namelist): - if f and f[0]: - result.append((name, f[1])) - else: - result.append(f) - return result - - -# ----------------------------------------------------------------------------- -# _names_to_funcs() -# -# Given a list of regular expression function names, this converts it back to -# functions. -# ----------------------------------------------------------------------------- -def _names_to_funcs(namelist, fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]], n[1])) - else: - result.append(n) - return result - - -# ----------------------------------------------------------------------------- -# _form_master_re() -# -# This function takes a list of all of the regex components and attempts to -# form the master regular expression. Given limitations in the Python re -# module, it may be necessary to break the master regex into separate expressions. -# ----------------------------------------------------------------------------- -def _form_master_re(relist, reflags, ldict, toknames): - if not relist: - return [] - regex = "|".join(relist) - try: - lexre = re.compile(regex, reflags) - - # Build the index to function map for the matching engine - lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) - lexindexnames = lexindexfunc[:] - - for f, i in lexre.groupindex.items(): - handle = ldict.get(f, None) - if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle, toknames[f]) - lexindexnames[i] = f - elif handle is not None: - lexindexnames[i] = f - if f.find("ignore_") > 0: - lexindexfunc[i] = (None, None) - else: - lexindexfunc[i] = (None, toknames[f]) - - return [(lexre, lexindexfunc)], [regex], [lexindexnames] - except Exception: - m = int(len(relist) / 2) - if m == 0: - m = 1 - llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) - rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) - return (llist + rlist), (lre + rre), (lnames + rnames) - - -# ----------------------------------------------------------------------------- -# def _statetoken(s,names) -# -# Given a declaration name s of the form "t_" and a dictionary whose keys are -# state names, this function returns a tuple (states,tokenname) where states -# is a tuple of state names and tokenname is the name of the token. For example, -# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') -# ----------------------------------------------------------------------------- -def _statetoken(s, names): - nonstate = 1 - parts = s.split("_") - for i, part in enumerate(parts[1:], 1): - if part not in names and part != "ANY": - break - - if i > 1: - states = tuple(parts[1:i]) - else: - states = ("INITIAL",) - - if "ANY" in states: - states = tuple(names) - - tokenname = "_".join(parts[i:]) - return (states, tokenname) - - -# ----------------------------------------------------------------------------- -# LexerReflect() -# -# This class represents information needed to build a lexer as extracted from a -# user's input file. -# ----------------------------------------------------------------------------- -class LexerReflect(object): - def __init__(self, ldict, log=None, reflags=0): - self.ldict = ldict - self.error_func = None - self.tokens = [] - self.reflags = reflags - self.stateinfo = {"INITIAL": "inclusive"} - self.modules = set() - self.error = False - self.log = PlyLogger(sys.stderr) if log is None else log - - # Get all of the basic information - def get_all(self): - self.get_tokens() - self.get_literals() - self.get_states() - self.get_rules() - - # Validate all of the information - def validate_all(self): - self.validate_tokens() - self.validate_literals() - self.validate_rules() - return self.error - - # Get the tokens map - def get_tokens(self): - tokens = self.ldict.get("tokens", None) - if not tokens: - self.log.error("No token list is defined") - self.error = True - return - - if not isinstance(tokens, (list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = True - return - - if not tokens: - self.log.error("tokens is empty") - self.error = True - return - - self.tokens = tokens - - # Validate the tokens - def validate_tokens(self): - terminals = {} - for n in self.tokens: - if not _is_identifier.match(n): - self.log.error("Bad token name '%s'", n) - self.error = True - if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 - - # Get the literals specifier - def get_literals(self): - self.literals = self.ldict.get("literals", "") - if not self.literals: - self.literals = "" - - # Validate literals - def validate_literals(self): - try: - for c in self.literals: - if not isinstance(c, StringTypes) or len(c) > 1: - self.log.error( - "Invalid literal %s. Must be a single character", repr(c) - ) - self.error = True - - except TypeError: - self.log.error( - "Invalid literals specification. literals must be a sequence of characters" - ) - self.error = True - - def get_states(self): - self.states = self.ldict.get("states", None) - # Build statemap - if self.states: - if not isinstance(self.states, (tuple, list)): - self.log.error("states must be defined as a tuple or list") - self.error = True - else: - for s in self.states: - if not isinstance(s, tuple) or len(s) != 2: - self.log.error( - "Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", - repr(s), - ) - self.error = True - continue - name, statetype = s - if not isinstance(name, StringTypes): - self.log.error("State name %s must be a string", repr(name)) - self.error = True - continue - if not (statetype == "inclusive" or statetype == "exclusive"): - self.log.error( - "State type for state %s must be 'inclusive' or 'exclusive'", - name, - ) - self.error = True - continue - if name in self.stateinfo: - self.log.error("State '%s' already defined", name) - self.error = True - continue - self.stateinfo[name] = statetype - - # Get all of the symbols with a t_ prefix and sort them into various - # categories (functions, strings, error functions, and ignore characters) - - def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == "t_"] - - # Now build up a list of functions and a list of strings - self.toknames = {} # Mapping of symbols to token names - self.funcsym = {} # Symbols defined as functions - self.strsym = {} # Symbols defined as strings - self.ignore = {} # Ignore strings by state - self.errorf = {} # Error functions by state - self.eoff = {} # EOF functions by state - - for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] - - if len(tsymbols) == 0: - self.log.error("No rules of the form t_rulename are defined") - self.error = True - return - - for f in tsymbols: - t = self.ldict[f] - states, tokname = _statetoken(f, self.stateinfo) - self.toknames[f] = tokname - - if hasattr(t, "__call__"): - if tokname == "error": - for s in states: - self.errorf[s] = t - elif tokname == "eof": - for s in states: - self.eoff[s] = t - elif tokname == "ignore": - line = t.__code__.co_firstlineno - file = t.__code__.co_filename - self.log.error( - "%s:%d: Rule '%s' must be defined as a string", - file, - line, - t.__name__, - ) - self.error = True - else: - for s in states: - self.funcsym[s].append((f, t)) - elif isinstance(t, StringTypes): - if tokname == "ignore": - for s in states: - self.ignore[s] = t - if "\\" in t: - self.log.warning("%s contains a literal backslash '\\'", f) - - elif tokname == "error": - self.log.error("Rule '%s' must be defined as a function", f) - self.error = True - else: - for s in states: - self.strsym[s].append((f, t)) - else: - self.log.error("%s not defined as a function or string", f) - self.error = True - - # Sort the functions by line number - for f in self.funcsym.values(): - f.sort(key=lambda x: x[1].__code__.co_firstlineno) - - # Sort the strings by regular expression length - for s in self.strsym.values(): - s.sort(key=lambda x: len(x[1]), reverse=True) - - # Validate all of the t_rules collected - def validate_rules(self): - for state in self.stateinfo: - # Validate all rules defined by functions - - for fname, f in self.funcsym[state]: - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - module = inspect.getmodule(f) - self.modules.add(module) - - tokname = self.toknames[fname] - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = f.__code__.co_argcount - if nargs > reqargs: - self.log.error( - "%s:%d: Rule '%s' has too many arguments", - file, - line, - f.__name__, - ) - self.error = True - continue - - if nargs < reqargs: - self.log.error( - "%s:%d: Rule '%s' requires an argument", file, line, f.__name__ - ) - self.error = True - continue - - if not _get_regex(f): - self.log.error( - "%s:%d: No regular expression defined for rule '%s'", - file, - line, - f.__name__, - ) - self.error = True - continue - - try: - c = re.compile("(?P<%s>%s)" % (fname, _get_regex(f)), self.reflags) - if c.match(""): - self.log.error( - "%s:%d: Regular expression for rule '%s' matches empty string", - file, - line, - f.__name__, - ) - self.error = True - except re.error as e: - self.log.error( - "%s:%d: Invalid regular expression for rule '%s'. %s", - file, - line, - f.__name__, - e, - ) - if "#" in _get_regex(f): - self.log.error( - "%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", - file, - line, - f.__name__, - ) - self.error = True - - # Validate all rules defined by strings - for name, r in self.strsym[state]: - tokname = self.toknames[name] - if tokname == "error": - self.log.error("Rule '%s' must be defined as a function", name) - self.error = True - continue - - if tokname not in self.tokens and tokname.find("ignore_") < 0: - self.log.error( - "Rule '%s' defined for an unspecified token %s", name, tokname - ) - self.error = True - continue - - try: - c = re.compile("(?P<%s>%s)" % (name, r), self.reflags) - if c.match(""): - self.log.error( - "Regular expression for rule '%s' matches empty string", - name, - ) - self.error = True - except re.error as e: - self.log.error( - "Invalid regular expression for rule '%s'. %s", name, e - ) - if "#" in r: - self.log.error( - "Make sure '#' in rule '%s' is escaped with '\\#'", name - ) - self.error = True - - if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state '%s'", state) - self.error = True - - # Validate the error function - efunc = self.errorf.get(state, None) - if efunc: - f = efunc - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - module = inspect.getmodule(f) - self.modules.add(module) - - if isinstance(f, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - nargs = f.__code__.co_argcount - if nargs > reqargs: - self.log.error( - "%s:%d: Rule '%s' has too many arguments", - file, - line, - f.__name__, - ) - self.error = True - - if nargs < reqargs: - self.log.error( - "%s:%d: Rule '%s' requires an argument", file, line, f.__name__ - ) - self.error = True - - for module in self.modules: - self.validate_module(module) - - # ----------------------------------------------------------------------------- - # validate_module() - # - # This checks to see if there are duplicated t_rulename() functions or strings - # in the parser input file. This is done using a simple regular expression - # match on each line in the source code of the given module. - # ----------------------------------------------------------------------------- - - def validate_module(self, module): - try: - lines, linen = inspect.getsourcelines(module) - except IOError: - return - - fre = re.compile(r"\s*def\s+(t_[a-zA-Z_0-9]*)\(") - sre = re.compile(r"\s*(t_[a-zA-Z_0-9]*)\s*=") - - counthash = {} - linen += 1 - for line in lines: - m = fre.match(line) - if not m: - m = sre.match(line) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - filename = inspect.getsourcefile(module) - self.log.error( - "%s:%d: Rule %s redefined. Previously defined on line %d", - filename, - linen, - name, - prev, - ) - self.error = True - linen += 1 - - -# ----------------------------------------------------------------------------- -# lex(module) -# -# Build all of the regular expression rules from definitions in the supplied module -# ----------------------------------------------------------------------------- -def lex( - module=None, - object=None, - debug=False, - optimize=False, - lextab="lextab", - reflags=int(re.VERBOSE), - nowarn=False, - outputdir=None, - debuglog=None, - errorlog=None, -): - - if lextab is None: - lextab = "lextab" - - global lexer - - ldict = None - stateinfo = {"INITIAL": "inclusive"} - lexobj = Lexer() - lexobj.lexoptimize = optimize - global token, input - - if errorlog is None: - errorlog = PlyLogger(sys.stderr) - - if debug: - if debuglog is None: - debuglog = PlyLogger(sys.stderr) - - # Get the module dictionary used for the lexer - if object: - module = object - - # Get the module dictionary used for the parser - if module: - _items = [(k, getattr(module, k)) for k in dir(module)] - ldict = dict(_items) - # If no __file__ attribute is available, try to obtain it from the __module__ instead - if "__file__" not in ldict: - ldict["__file__"] = sys.modules[ldict["__module__"]].__file__ - else: - ldict = get_caller_module_dict(2) - - # Determine if the module is package of a package or not. - # If so, fix the tabmodule setting so that tables load correctly - pkg = ldict.get("__package__") - if pkg and isinstance(lextab, str): - if "." not in lextab: - lextab = pkg + "." + lextab - - # Collect parser information from the dictionary - linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) - linfo.get_all() - if not optimize: - if linfo.validate_all(): - raise SyntaxError("Can't build lexer") - - if optimize and lextab: - try: - lexobj.readtab(lextab, ldict) - token = lexobj.token - input = lexobj.input - lexer = lexobj - return lexobj - - except ImportError: - pass - - # Dump some basic debugging information - if debug: - debuglog.info("lex: tokens = %r", linfo.tokens) - debuglog.info("lex: literals = %r", linfo.literals) - debuglog.info("lex: states = %r", linfo.stateinfo) - - # Build a dictionary of valid token names - lexobj.lextokens = set() - for n in linfo.tokens: - lexobj.lextokens.add(n) - - # Get literals specification - if isinstance(linfo.literals, (list, tuple)): - lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) - else: - lexobj.lexliterals = linfo.literals - - lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) - - # Get the stateinfo dictionary - stateinfo = linfo.stateinfo - - regexs = {} - # Build the master regular expressions - for state in stateinfo: - regex_list = [] - - # Add rules defined by functions first - for fname, f in linfo.funcsym[state]: - line = f.__code__.co_firstlineno - file = f.__code__.co_filename - regex_list.append("(?P<%s>%s)" % (fname, _get_regex(f))) - if debug: - debuglog.info( - "lex: Adding rule %s -> '%s' (state '%s')", - fname, - _get_regex(f), - state, - ) - - # Now add all of the simple rules - for name, r in linfo.strsym[state]: - regex_list.append("(?P<%s>%s)" % (name, r)) - if debug: - debuglog.info( - "lex: Adding rule %s -> '%s' (state '%s')", name, r, state - ) - - regexs[state] = regex_list - - # Build the master regular expressions - - if debug: - debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") - - for state in regexs: - lexre, re_text, re_names = _form_master_re( - regexs[state], reflags, ldict, linfo.toknames - ) - lexobj.lexstatere[state] = lexre - lexobj.lexstateretext[state] = re_text - lexobj.lexstaterenames[state] = re_names - if debug: - for i, text in enumerate(re_text): - debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) - - # For inclusive states, we need to add the regular expressions from the INITIAL state - for state, stype in stateinfo.items(): - if state != "INITIAL" and stype == "inclusive": - lexobj.lexstatere[state].extend(lexobj.lexstatere["INITIAL"]) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext["INITIAL"]) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames["INITIAL"]) - - lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] - lexobj.lexreflags = reflags - - # Set up ignore variables - lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL", "") - - # Set up error functions - lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get("INITIAL", None) - if not lexobj.lexerrorf: - errorlog.warning("No t_error rule is defined") - - # Set up eof functions - lexobj.lexstateeoff = linfo.eoff - lexobj.lexeoff = linfo.eoff.get("INITIAL", None) - - # Check state information for ignore and error rules - for s, stype in stateinfo.items(): - if stype == "exclusive": - if s not in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state '%s'", s) - if s not in linfo.ignore and lexobj.lexignore: - errorlog.warning( - "No ignore rule is defined for exclusive state '%s'", s - ) - elif stype == "inclusive": - if s not in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get("INITIAL", None) - if s not in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get("INITIAL", "") - - # Create global versions of the token() and input() functions - token = lexobj.token - input = lexobj.input - lexer = lexobj - - # If in optimize mode, we write the lextab - if lextab and optimize: - if outputdir is None: - # If no output directory is set, the location of the output files - # is determined according to the following rules: - # - If lextab specifies a package, files go into that package directory - # - Otherwise, files go in the same directory as the specifying module - if isinstance(lextab, types.ModuleType): - srcfile = lextab.__file__ - else: - if "." not in lextab: - srcfile = ldict["__file__"] - else: - parts = lextab.split(".") - pkgname = ".".join(parts[:-1]) - exec("import %s" % pkgname) - srcfile = getattr(sys.modules[pkgname], "__file__", "") - outputdir = os.path.dirname(srcfile) - try: - lexobj.writetab(lextab, outputdir) - except IOError as e: - errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e)) - - return lexobj - - -# ----------------------------------------------------------------------------- -# runmain() -# -# This runs the lexer as a main program -# ----------------------------------------------------------------------------- - - -def runmain(lexer=None, data=None): - if not data: - try: - filename = sys.argv[1] - f = open(filename) - data = f.read() - f.close() - except IndexError: - sys.stdout.write("Reading from standard input (type EOF to end):\n") - data = sys.stdin.read() - - if lexer: - _input = lexer.input - else: - _input = input - _input(data) - if lexer: - _token = lexer.token - else: - _token = token - - while True: - tok = _token() - if not tok: - break - sys.stdout.write( - "(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno, tok.lexpos) - ) - - -# ----------------------------------------------------------------------------- -# @TOKEN(regex) -# -# This decorator function can be used to set the regex expression on a function -# when its docstring might need to be set in an alternative way -# ----------------------------------------------------------------------------- - - -def TOKEN(r): - def set_regex(f): - if hasattr(r, "__call__"): - f.regex = _get_regex(r) - else: - f.regex = r - return f - - return set_regex - - -# Alternative spelling of the TOKEN decorator -Token = TOKEN diff --git a/src/pycparser/ply/lex.pyc b/src/pycparser/ply/lex.pyc deleted file mode 100644 index 62f35979..00000000 Binary files a/src/pycparser/ply/lex.pyc and /dev/null differ diff --git a/src/pycparser/ply/yacc.py b/src/pycparser/ply/yacc.py deleted file mode 100755 index efb9dabc..00000000 --- a/src/pycparser/ply/yacc.py +++ /dev/null @@ -1,3741 +0,0 @@ -# ----------------------------------------------------------------------------- -# ply: yacc.py -# -# Copyright (C) 2001-2017 -# David M. Beazley (Dabeaz LLC) -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to -# endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ----------------------------------------------------------------------------- -# -# This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside -# Python documentation strings. The inspiration for this technique was borrowed -# from John Aycock's Spark parsing system. PLY might be viewed as cross between -# Spark and the GNU bison utility. -# -# The current implementation is only somewhat object-oriented. The -# LR parser itself is defined in terms of an object (which allows multiple -# parsers to co-exist). However, most of the variables used during table -# construction are defined in terms of global variables. Users shouldn't -# notice unless they are trying to define multiple parsers at the same -# time using threads (in which case they should have their head examined). -# -# This implementation supports both SLR and LALR(1) parsing. LALR(1) -# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), -# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, -# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced -# by the more efficient DeRemer and Pennello algorithm. -# -# :::::::: WARNING ::::::: -# -# Construction of LR parsing tables is fairly complicated and expensive. -# To make this module run fast, a *LOT* of work has been put into -# optimization---often at the expensive of readability and what might -# consider to be good Python "coding style." Modify the code at your -# own risk! -# ---------------------------------------------------------------------------- - -import base64 -import inspect -import os.path -import re -import sys -import types -import warnings - -__version__ = "3.10" -__tabversion__ = "3.10" - -# ----------------------------------------------------------------------------- -# === User configurable parameters === -# -# Change these to modify the default behavior of yacc (if you wish) -# ----------------------------------------------------------------------------- - -yaccdebug = True # Debugging mode. If set, yacc generates a -# a 'parser.out' file in the current directory - -debug_file = "parser.out" # Default name of the debugging file -tab_module = "parsetab" # Default name of the table module -default_lr = "LALR" # Default LR table generation method - -error_count = 3 # Number of symbols that must be shifted to leave recovery mode - -yaccdevel = False # Set to True if developing yacc. This turns off optimized -# implementations of certain functions. - -resultlimit = 40 # Size limit of results when running in debug mode. - -pickle_protocol = 0 # Protocol to use when writing pickle files - -# String type-checking compatibility -if sys.version_info[0] < 3: - string_types = basestring -else: - string_types = str - -MAXINT = sys.maxsize - -# This object is a stand-in for a logging object created by the -# logging module. PLY will use this by default to create things -# such as the parser.out file. If a user wants more detailed -# information, they can create their own logging object and pass -# it into PLY. - - -class PlyLogger(object): - def __init__(self, f): - self.f = f - - def debug(self, msg, *args, **kwargs): - self.f.write((msg % args) + "\n") - - info = debug - - def warning(self, msg, *args, **kwargs): - self.f.write("WARNING: " + (msg % args) + "\n") - - def error(self, msg, *args, **kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") - - critical = debug - - -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self, name): - return self - - def __call__(self, *args, **kwargs): - return self - - -# Exception raised for yacc-related errors -class YaccError(Exception): - pass - - -# Format the result message that the parser produces when running in debug mode. -def format_result(r): - repr_str = repr(r) - if "\n" in repr_str: - repr_str = repr(repr_str) - if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit] + " ..." - result = "<%s @ 0x%x> (%s)" % (type(r).__name__, id(r), repr_str) - return result - - -# Format stack entries when the parser is running in debug mode -def format_stack_entry(r): - repr_str = repr(r) - if "\n" in repr_str: - repr_str = repr(repr_str) - if len(repr_str) < 16: - return repr_str - else: - return "<%s @ 0x%x>" % (type(r).__name__, id(r)) - - -# Panic mode error recovery support. This feature is being reworked--much of the -# code here is to offer a deprecation/backwards compatible transition - -_errok = None -_token = None -_restart = None -_warnmsg = """PLY: Don't use global functions errok(), token(), and restart() in p_error(). -Instead, invoke the methods on the associated parser instance: - - def p_error(p): - ... - # Use parser.errok(), parser.token(), parser.restart() - ... - - parser = yacc.yacc() -""" - - -def errok(): - warnings.warn(_warnmsg) - return _errok() - - -def restart(): - warnings.warn(_warnmsg) - return _restart() - - -def token(): - warnings.warn(_warnmsg) - return _token() - - -# Utility function to call the p_error() function with some deprecation hacks -def call_errorfunc(errorfunc, token, parser): - global _errok, _token, _restart - _errok = parser.errok - _token = parser.token - _restart = parser.restart - r = errorfunc(token) - try: - del _errok, _token, _restart - except NameError: - pass - return r - - -# ----------------------------------------------------------------------------- -# === LR Parsing Engine === -# -# The following classes are used for the LR parser itself. These are not -# used during table construction and are independent of the actual LR -# table generation algorithm -# ----------------------------------------------------------------------------- - -# This class is used to hold non-terminal grammar symbols during parsing. -# It normally has the following attributes set: -# .type = Grammar symbol type -# .value = Symbol value -# .lineno = Starting line number -# .endlineno = Ending line number (optional, set automatically) -# .lexpos = Starting lex position -# .endlexpos = Ending lex position (optional, set automatically) - - -class YaccSymbol: - def __str__(self): - return self.type - - def __repr__(self): - return str(self) - - -# This class is a wrapper around the objects actually passed to each -# grammar rule. Index lookup and assignment actually assign the -# .value attribute of the underlying YaccSymbol object. -# The lineno() method returns the line number of a given -# item (or 0 if not defined). The linespan() method returns -# a tuple of (startline,endline) representing the range of lines -# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) -# representing the range of positional information for a symbol. - - -class YaccProduction: - def __init__(self, s, stack=None): - self.slice = s - self.stack = stack - self.lexer = None - self.parser = None - - def __getitem__(self, n): - if isinstance(n, slice): - return [s.value for s in self.slice[n]] - elif n >= 0: - return self.slice[n].value - else: - return self.stack[n].value - - def __setitem__(self, n, v): - self.slice[n].value = v - - def __getslice__(self, i, j): - return [s.value for s in self.slice[i:j]] - - def __len__(self): - return len(self.slice) - - def lineno(self, n): - return getattr(self.slice[n], "lineno", 0) - - def set_lineno(self, n, lineno): - self.slice[n].lineno = lineno - - def linespan(self, n): - startline = getattr(self.slice[n], "lineno", 0) - endline = getattr(self.slice[n], "endlineno", startline) - return startline, endline - - def lexpos(self, n): - return getattr(self.slice[n], "lexpos", 0) - - def lexspan(self, n): - startpos = getattr(self.slice[n], "lexpos", 0) - endpos = getattr(self.slice[n], "endlexpos", startpos) - return startpos, endpos - - def error(self): - raise SyntaxError - - -# ----------------------------------------------------------------------------- -# == LRParser == -# -# The LR Parsing engine. -# ----------------------------------------------------------------------------- - - -class LRParser: - def __init__(self, lrtab, errorf): - self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf - self.set_defaulted_states() - self.errorok = True - - def errok(self): - self.errorok = True - - def restart(self): - del self.statestack[:] - del self.symstack[:] - sym = YaccSymbol() - sym.type = "$end" - self.symstack.append(sym) - self.statestack.append(0) - - # Defaulted state support. - # This method identifies parser states where there is only one possible reduction action. - # For such states, the parser can make a choose to make a rule reduction without consuming - # the next look-ahead token. This delayed invocation of the tokenizer can be useful in - # certain kinds of advanced parsing situations where the lexer and parser interact with - # each other or change states (i.e., manipulation of scope, lexer states, etc.). - # - # See: https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions - def set_defaulted_states(self): - self.defaulted_states = {} - for state, actions in self.action.items(): - rules = list(actions.values()) - if len(rules) == 1 and rules[0] < 0: - self.defaulted_states[state] = rules[0] - - def disable_defaulted_states(self): - self.defaulted_states = {} - - def parse( - self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None - ): - if debug or yaccdevel: - if isinstance(debug, int): - debug = PlyLogger(sys.stderr) - return self.parsedebug(input, lexer, debug, tracking, tokenfunc) - elif tracking: - return self.parseopt(input, lexer, debug, tracking, tokenfunc) - else: - return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parsedebug(). - # - # This is the debugging enabled version of parse(). All changes made to the - # parsing engine should be made here. Optimized versions of this function - # are automatically created by the ply/ygen.py script. This script cuts out - # sections enclosed in markers such as this: - # - # #--! DEBUG - # statements - # #--! DEBUG - # - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parsedebug( - self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None - ): - # --! parsedebug-start - lookahead = None # Current lookahead symbol - lookaheadstack = [] # Stack of lookahead symbols - actions = ( - self.action - ) # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = ( - self.productions - ) # Local reference to production list (to avoid lookup on self.) - defaulted_states = self.defaulted_states # Local reference to defaulted states - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # --! DEBUG - debug.info("PLY: PARSE DEBUG START") - # --! DEBUG - - # If no lexer was given, we will try to use the lex module - if not lexer: - from . import lex - - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set the parser() token method (sometimes used in error recovery) - self.token = get_token - - # Set up the state and symbol stacks - - statestack = [] # Stack of parsing states - self.statestack = statestack - symstack = [] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = "$end" - symstack.append(sym) - state = 0 - while True: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - # --! DEBUG - debug.debug("") - debug.debug("State : %s", state) - # --! DEBUG - - if state not in defaulted_states: - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - else: - t = defaulted_states[state] - # --! DEBUG - debug.debug("Defaulted state %s: Reduce using %d", state, -t) - # --! DEBUG - - # --! DEBUG - debug.debug( - "Stack : %s", - ( - "%s . %s" - % (" ".join([xx.type for xx in symstack][1:]), str(lookahead)) - ).lstrip(), - ) - # --! DEBUG - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - # --! DEBUG - debug.debug("Action : Shift and goto state %s", t) - # --! DEBUG - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: - errorcount -= 1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - # --! DEBUG - if plen: - debug.info( - "Action : Reduce rule [%s] with %s and goto state %d", - p.str, - "[" - + ",".join( - [ - format_stack_entry(_v.value) - for _v in symstack[-plen:] - ] - ) - + "]", - goto[statestack[-1 - plen]][pname], - ) - else: - debug.info( - "Action : Reduce rule [%s] with %s and goto state %d", - p.str, - [], - goto[statestack[-1]][pname], - ) - - # --! DEBUG - - if plen: - targ = symstack[-plen - 1 :] - targ[0] = sym - - # --! TRACKING - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1, "endlineno", t1.lineno) - sym.endlexpos = getattr(t1, "endlexpos", t1.lexpos) - # --! TRACKING - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - self.state = state - p.callable(pslice) - del statestack[-plen:] - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append( - lookahead - ) # Save the current lookahead token - symstack.extend( - targ[1:-1] - ) # Put the production slice back on the stack - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = "error" - sym.value = "error" - lookahead = sym - errorcount = error_count - self.errorok = False - - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - # --! TRACKING - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING - - targ = [sym] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - self.state = state - p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append( - lookahead - ) # Save the current lookahead token - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = "error" - sym.value = "error" - lookahead = sym - errorcount = error_count - self.errorok = False - - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - result = getattr(n, "value", None) - # --! DEBUG - debug.info("Done : Returning %s", format_result(result)) - debug.info("PLY: PARSE DEBUG END") - # --! DEBUG - return result - - if t is None: - - # --! DEBUG - debug.error( - "Error : %s", - ( - "%s . %s" - % (" ".join([xx.type for xx in symstack][1:]), str(lookahead)) - ).lstrip(), - ) - # --! DEBUG - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = False - errtoken = lookahead - if errtoken.type == "$end": - errtoken = None # End of file! - if self.errorfunc: - if errtoken and not hasattr(errtoken, "lexer"): - errtoken.lexer = lexer - self.state = state - tok = call_errorfunc(self.errorfunc, errtoken, self) - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken, "lineno"): - lineno = lookahead.lineno - else: - lineno = 0 - if lineno: - sys.stderr.write( - "yacc: Syntax error at line %d, token=%s\n" - % (lineno, errtoken.type) - ) - else: - sys.stderr.write( - "yacc: Syntax error, token=%s" % errtoken.type - ) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != "$end": - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == "$end": - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != "error": - sym = symstack[-1] - if sym.type == "error": - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - # --! TRACKING - if tracking: - sym.endlineno = getattr(lookahead, "lineno", sym.lineno) - sym.endlexpos = getattr(lookahead, "lexpos", sym.lexpos) - # --! TRACKING - lookahead = None - continue - - # Create the error symbol for the first time and make it the new lookahead symbol - t = YaccSymbol() - t.type = "error" - - if hasattr(lookahead, "lineno"): - t.lineno = t.endlineno = lookahead.lineno - if hasattr(lookahead, "lexpos"): - t.lexpos = t.endlexpos = lookahead.lexpos - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - sym = symstack.pop() - # --! TRACKING - if tracking: - lookahead.lineno = sym.lineno - lookahead.lexpos = sym.lexpos - # --! TRACKING - statestack.pop() - state = statestack[-1] - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # --! parsedebug-end - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt(). - # - # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! - # This code is automatically generated by the ply/ygen.py script. Make - # changes to the parsedebug() method instead. - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parseopt( - self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None - ): - # --! parseopt-start - lookahead = None # Current lookahead symbol - lookaheadstack = [] # Stack of lookahead symbols - actions = ( - self.action - ) # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = ( - self.productions - ) # Local reference to production list (to avoid lookup on self.) - defaulted_states = self.defaulted_states # Local reference to defaulted states - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - from . import lex - - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set the parser() token method (sometimes used in error recovery) - self.token = get_token - - # Set up the state and symbol stacks - - statestack = [] # Stack of parsing states - self.statestack = statestack - symstack = [] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = "$end" - symstack.append(sym) - state = 0 - while True: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if state not in defaulted_states: - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - else: - t = defaulted_states[state] - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: - errorcount -= 1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if plen: - targ = symstack[-plen - 1 :] - targ[0] = sym - - # --! TRACKING - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1, "endlineno", t1.lineno) - sym.endlexpos = getattr(t1, "endlexpos", t1.lexpos) - # --! TRACKING - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - self.state = state - p.callable(pslice) - del statestack[-plen:] - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append( - lookahead - ) # Save the current lookahead token - symstack.extend( - targ[1:-1] - ) # Put the production slice back on the stack - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = "error" - sym.value = "error" - lookahead = sym - errorcount = error_count - self.errorok = False - - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - # --! TRACKING - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING - - targ = [sym] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - self.state = state - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append( - lookahead - ) # Save the current lookahead token - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = "error" - sym.value = "error" - lookahead = sym - errorcount = error_count - self.errorok = False - - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - result = getattr(n, "value", None) - return result - - if t is None: - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = False - errtoken = lookahead - if errtoken.type == "$end": - errtoken = None # End of file! - if self.errorfunc: - if errtoken and not hasattr(errtoken, "lexer"): - errtoken.lexer = lexer - self.state = state - tok = call_errorfunc(self.errorfunc, errtoken, self) - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken, "lineno"): - lineno = lookahead.lineno - else: - lineno = 0 - if lineno: - sys.stderr.write( - "yacc: Syntax error at line %d, token=%s\n" - % (lineno, errtoken.type) - ) - else: - sys.stderr.write( - "yacc: Syntax error, token=%s" % errtoken.type - ) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != "$end": - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == "$end": - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != "error": - sym = symstack[-1] - if sym.type == "error": - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - # --! TRACKING - if tracking: - sym.endlineno = getattr(lookahead, "lineno", sym.lineno) - sym.endlexpos = getattr(lookahead, "lexpos", sym.lexpos) - # --! TRACKING - lookahead = None - continue - - # Create the error symbol for the first time and make it the new lookahead symbol - t = YaccSymbol() - t.type = "error" - - if hasattr(lookahead, "lineno"): - t.lineno = t.endlineno = lookahead.lineno - if hasattr(lookahead, "lexpos"): - t.lexpos = t.endlexpos = lookahead.lexpos - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - sym = symstack.pop() - # --! TRACKING - if tracking: - lookahead.lineno = sym.lineno - lookahead.lexpos = sym.lexpos - # --! TRACKING - statestack.pop() - state = statestack[-1] - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # --! parseopt-end - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt_notrack(). - # - # Optimized version of parseopt() with line number tracking removed. - # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated - # by the ply/ygen.py script. Make changes to the parsedebug() method instead. - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parseopt_notrack( - self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None - ): - # --! parseopt-notrack-start - lookahead = None # Current lookahead symbol - lookaheadstack = [] # Stack of lookahead symbols - actions = ( - self.action - ) # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = ( - self.productions - ) # Local reference to production list (to avoid lookup on self.) - defaulted_states = self.defaulted_states # Local reference to defaulted states - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - from . import lex - - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set the parser() token method (sometimes used in error recovery) - self.token = get_token - - # Set up the state and symbol stacks - - statestack = [] # Stack of parsing states - self.statestack = statestack - symstack = [] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = "$end" - symstack.append(sym) - state = 0 - while True: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if state not in defaulted_states: - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - else: - t = defaulted_states[state] - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: - errorcount -= 1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if plen: - targ = symstack[-plen - 1 :] - targ[0] = sym - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - self.state = state - p.callable(pslice) - del statestack[-plen:] - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append( - lookahead - ) # Save the current lookahead token - symstack.extend( - targ[1:-1] - ) # Put the production slice back on the stack - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = "error" - sym.value = "error" - lookahead = sym - errorcount = error_count - self.errorok = False - - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - targ = [sym] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - self.state = state - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append( - lookahead - ) # Save the current lookahead token - statestack.pop() # Pop back one state (before the reduce) - state = statestack[-1] - sym.type = "error" - sym.value = "error" - lookahead = sym - errorcount = error_count - self.errorok = False - - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - if t == 0: - n = symstack[-1] - result = getattr(n, "value", None) - return result - - if t is None: - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = False - errtoken = lookahead - if errtoken.type == "$end": - errtoken = None # End of file! - if self.errorfunc: - if errtoken and not hasattr(errtoken, "lexer"): - errtoken.lexer = lexer - self.state = state - tok = call_errorfunc(self.errorfunc, errtoken, self) - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken, "lineno"): - lineno = lookahead.lineno - else: - lineno = 0 - if lineno: - sys.stderr.write( - "yacc: Syntax error at line %d, token=%s\n" - % (lineno, errtoken.type) - ) - else: - sys.stderr.write( - "yacc: Syntax error, token=%s" % errtoken.type - ) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != "$end": - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == "$end": - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != "error": - sym = symstack[-1] - if sym.type == "error": - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - - # Create the error symbol for the first time and make it the new lookahead symbol - t = YaccSymbol() - t.type = "error" - - if hasattr(lookahead, "lineno"): - t.lineno = t.endlineno = lookahead.lineno - if hasattr(lookahead, "lexpos"): - t.lexpos = t.endlexpos = lookahead.lexpos - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - sym = symstack.pop() - statestack.pop() - state = statestack[-1] - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # --! parseopt-notrack-end - - -# ----------------------------------------------------------------------------- -# === Grammar Representation === -# -# The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. -# ----------------------------------------------------------------------------- - -# regex matching identifiers -_is_identifier = re.compile(r"^[a-zA-Z0-9_-]+$") - -# ----------------------------------------------------------------------------- -# class Production: -# -# This class stores the raw information about a single production or grammar rule. -# A grammar rule refers to a specification such as this: -# -# expr : expr PLUS term -# -# Here are the basic attributes defined on all productions -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','PLUS','term'] -# prec - Production precedence level -# number - Production number. -# func - Function that executes on reduce -# file - File where production function is defined -# lineno - Line number where production function is defined -# -# The following attributes are defined or optional. -# -# len - Length of the production (number of symbols on right hand side) -# usyms - Set of unique symbols found in the production -# ----------------------------------------------------------------------------- - - -class Production(object): - reduced = 0 - - def __init__( - self, number, name, prod, precedence=("right", 0), func=None, file="", line=0 - ): - self.name = name - self.prod = tuple(prod) - self.number = number - self.func = func - self.callable = None - self.file = file - self.line = line - self.prec = precedence - - # Internal settings used during table construction - - self.len = len(self.prod) # Length of the production - - # Create a list of unique production symbols used in the production - self.usyms = [] - for s in self.prod: - if s not in self.usyms: - self.usyms.append(s) - - # List of all LR items for the production - self.lr_items = [] - self.lr_next = None - - # Create a string representation - if self.prod: - self.str = "%s -> %s" % (self.name, " ".join(self.prod)) - else: - self.str = "%s -> " % self.name - - def __str__(self): - return self.str - - def __repr__(self): - return "Production(" + str(self) + ")" - - def __len__(self): - return len(self.prod) - - def __nonzero__(self): - return 1 - - def __getitem__(self, index): - return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self, n): - if n > len(self.prod): - return None - p = LRItem(self, n) - # Precompute the list of productions immediately following. - try: - p.lr_after = Prodnames[p.prod[n + 1]] - except (IndexError, KeyError): - p.lr_after = [] - try: - p.lr_before = p.prod[n - 1] - except IndexError: - p.lr_before = None - return p - - # Bind the production function name to a callable - def bind(self, pdict): - if self.func: - self.callable = pdict[self.func] - - -# This class serves as a minimal standin for Production objects when -# reading table data from files. It only contains information -# actually used by the LR parsing engine, plus some additional -# debugging information. -class MiniProduction(object): - def __init__(self, str, name, len, func, file, line): - self.name = name - self.len = len - self.func = func - self.callable = None - self.file = file - self.line = line - self.str = str - - def __str__(self): - return self.str - - def __repr__(self): - return "MiniProduction(%s)" % self.str - - # Bind the production function name to a callable - def bind(self, pdict): - if self.func: - self.callable = pdict[self.func] - - -# ----------------------------------------------------------------------------- -# class LRItem -# -# This class represents a specific stage of parsing a production rule. For -# example: -# -# expr : expr . PLUS term -# -# In the above, the "." represents the current location of the parse. Here -# basic attributes: -# -# name - Name of the production. For example 'expr' -# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] -# number - Production number. -# -# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' -# then lr_next refers to 'expr -> expr PLUS . term' -# lr_index - LR item index (location of the ".") in the prod list. -# lookaheads - LALR lookahead symbols for this item -# len - Length of the production (number of symbols on right hand side) -# lr_after - List of all productions that immediately follow -# lr_before - Grammar symbol immediately before -# ----------------------------------------------------------------------------- - - -class LRItem(object): - def __init__(self, p, n): - self.name = p.name - self.prod = list(p.prod) - self.number = p.number - self.lr_index = n - self.lookaheads = {} - self.prod.insert(n, ".") - self.prod = tuple(self.prod) - self.len = len(self.prod) - self.usyms = p.usyms - - def __str__(self): - if self.prod: - s = "%s -> %s" % (self.name, " ".join(self.prod)) - else: - s = "%s -> " % self.name - return s - - def __repr__(self): - return "LRItem(" + str(self) + ")" - - -# ----------------------------------------------------------------------------- -# rightmost_terminal() -# -# Return the rightmost terminal from a list of symbols. Used in add_production() -# ----------------------------------------------------------------------------- -def rightmost_terminal(symbols, terminals): - i = len(symbols) - 1 - while i >= 0: - if symbols[i] in terminals: - return symbols[i] - i -= 1 - return None - - -# ----------------------------------------------------------------------------- -# === GRAMMAR CLASS === -# -# The following class represents the contents of the specified grammar along -# with various computed properties such as first sets, follow sets, LR items, etc. -# This data is used for critical parts of the table generation process later. -# ----------------------------------------------------------------------------- - - -class GrammarError(YaccError): - pass - - -class Grammar(object): - def __init__(self, terminals): - self.Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar - - self.Prodnames = ( - {} - ) # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. - - self.Prodmap = {} # A dictionary that is only used to detect duplicate - # productions. - - self.Terminals = {} # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. - - for term in terminals: - self.Terminals[term] = [] - - self.Terminals["error"] = [] - - self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. - - self.First = {} # A dictionary of precomputed FIRST(x) symbols - - self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - - self.Precedence = ( - {} - ) # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) - - self.UsedPrecedence = ( - set() - ) # Precedence rules that were actually used by the grammer. - # This is only used to provide error checking and to generate - # a warning about unused precedence rules. - - self.Start = None # Starting symbol for the grammar - - def __len__(self): - return len(self.Productions) - - def __getitem__(self, index): - return self.Productions[index] - - # ----------------------------------------------------------------------------- - # set_precedence() - # - # Sets the precedence for a given terminal. assoc is the associativity such as - # 'left','right', or 'nonassoc'. level is a numeric level. - # - # ----------------------------------------------------------------------------- - - def set_precedence(self, term, assoc, level): - assert self.Productions == [ - None - ], "Must call set_precedence() before add_production()" - if term in self.Precedence: - raise GrammarError("Precedence already specified for terminal %r" % term) - if assoc not in ["left", "right", "nonassoc"]: - raise GrammarError( - "Associativity must be one of 'left','right', or 'nonassoc'" - ) - self.Precedence[term] = (assoc, level) - - # ----------------------------------------------------------------------------- - # add_production() - # - # Given an action function, this function assembles a production rule and - # computes its precedence level. - # - # The production rule is supplied as a list of symbols. For example, - # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and - # symbols ['expr','PLUS','term']. - # - # Precedence is determined by the precedence of the right-most non-terminal - # or the precedence of a terminal specified by %prec. - # - # A variety of error checks are performed to make sure production symbols - # are valid and that %prec is used correctly. - # ----------------------------------------------------------------------------- - - def add_production(self, prodname, syms, func=None, file="", line=0): - - if prodname in self.Terminals: - raise GrammarError( - "%s:%d: Illegal rule name %r. Already defined as a token" - % (file, line, prodname) - ) - if prodname == "error": - raise GrammarError( - "%s:%d: Illegal rule name %r. error is a reserved word" - % (file, line, prodname) - ) - if not _is_identifier.match(prodname): - raise GrammarError("%s:%d: Illegal rule name %r" % (file, line, prodname)) - - # Look for literal tokens - for n, s in enumerate(syms): - if s[0] in "'\"": - try: - c = eval(s) - if len(c) > 1: - raise GrammarError( - "%s:%d: Literal token %s in rule %r may only be a single character" - % (file, line, s, prodname) - ) - if c not in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass - if not _is_identifier.match(s) and s != "%prec": - raise GrammarError( - "%s:%d: Illegal name %r in rule %r" % (file, line, s, prodname) - ) - - # Determine the precedence level - if "%prec" in syms: - if syms[-1] == "%prec": - raise GrammarError( - "%s:%d: Syntax error. Nothing follows %%prec" % (file, line) - ) - if syms[-2] != "%prec": - raise GrammarError( - "%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" - % (file, line) - ) - precname = syms[-1] - prodprec = self.Precedence.get(precname) - if not prodprec: - raise GrammarError( - "%s:%d: Nothing known about the precedence of %r" - % (file, line, precname) - ) - else: - self.UsedPrecedence.add(precname) - del syms[-2:] # Drop %prec from the rule - else: - # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms, self.Terminals) - prodprec = self.Precedence.get(precname, ("right", 0)) - - # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname, syms) - if map in self.Prodmap: - m = self.Prodmap[map] - raise GrammarError( - "%s:%d: Duplicate rule %s. " % (file, line, m) - + "Previous definition at %s:%d" % (m.file, m.line) - ) - - # From this point on, everything is valid. Create a new Production instance - pnumber = len(self.Productions) - if prodname not in self.Nonterminals: - self.Nonterminals[prodname] = [] - - # Add the production number to Terminals and Nonterminals - for t in syms: - if t in self.Terminals: - self.Terminals[t].append(pnumber) - else: - if t not in self.Nonterminals: - self.Nonterminals[t] = [] - self.Nonterminals[t].append(pnumber) - - # Create a production and add it to the list of productions - p = Production(pnumber, prodname, syms, prodprec, func, file, line) - self.Productions.append(p) - self.Prodmap[map] = p - - # Add to the global productions list - try: - self.Prodnames[prodname].append(p) - except KeyError: - self.Prodnames[prodname] = [p] - - # ----------------------------------------------------------------------------- - # set_start() - # - # Sets the starting symbol and creates the augmented grammar. Production - # rule 0 is S' -> start where start is the start symbol. - # ----------------------------------------------------------------------------- - - def set_start(self, start=None): - if not start: - start = self.Productions[1].name - if start not in self.Nonterminals: - raise GrammarError("start symbol %s undefined" % start) - self.Productions[0] = Production(0, "S'", [start]) - self.Nonterminals[start].append(0) - self.Start = start - - # ----------------------------------------------------------------------------- - # find_unreachable() - # - # Find all of the nonterminal symbols that can't be reached from the starting - # symbol. Returns a list of nonterminals that can't be reached. - # ----------------------------------------------------------------------------- - - def find_unreachable(self): - - # Mark all symbols that are reachable from a symbol s - def mark_reachable_from(s): - if s in reachable: - return - reachable.add(s) - for p in self.Prodnames.get(s, []): - for r in p.prod: - mark_reachable_from(r) - - reachable = set() - mark_reachable_from(self.Productions[0].prod[0]) - return [s for s in self.Nonterminals if s not in reachable] - - # ----------------------------------------------------------------------------- - # infinite_cycles() - # - # This function looks at the various parsing rules and tries to detect - # infinite recursion cycles (grammar rules where there is no possible way - # to derive a string of only terminals). - # ----------------------------------------------------------------------------- - - def infinite_cycles(self): - terminates = {} - - # Terminals: - for t in self.Terminals: - terminates[t] = True - - terminates["$end"] = True - - # Nonterminals: - - # Initialize to false: - for n in self.Nonterminals: - terminates[n] = False - - # Then propagate termination until no change: - while True: - some_change = False - for (n, pl) in self.Prodnames.items(): - # Nonterminal n terminates iff any of its productions terminates. - for p in pl: - # Production p terminates iff all of its rhs symbols terminate. - for s in p.prod: - if not terminates[s]: - # The symbol s does not terminate, - # so production p does not terminate. - p_terminates = False - break - else: - # didn't break from the loop, - # so every symbol s terminates - # so production p terminates. - p_terminates = True - - if p_terminates: - # symbol n terminates! - if not terminates[n]: - terminates[n] = True - some_change = True - # Don't need to consider any more productions for this n. - break - - if not some_change: - break - - infinite = [] - for (s, term) in terminates.items(): - if not term: - if s not in self.Prodnames and s not in self.Terminals and s != "error": - # s is used-but-not-defined, and we've already warned of that, - # so it would be overkill to say that it's also non-terminating. - pass - else: - infinite.append(s) - - return infinite - - # ----------------------------------------------------------------------------- - # undefined_symbols() - # - # Find all symbols that were used the grammar, but not defined as tokens or - # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. - # ----------------------------------------------------------------------------- - def undefined_symbols(self): - result = [] - for p in self.Productions: - if not p: - continue - - for s in p.prod: - if s not in self.Prodnames and s not in self.Terminals and s != "error": - result.append((s, p)) - return result - - # ----------------------------------------------------------------------------- - # unused_terminals() - # - # Find all terminals that were defined, but not used by the grammar. Returns - # a list of all symbols. - # ----------------------------------------------------------------------------- - def unused_terminals(self): - unused_tok = [] - for s, v in self.Terminals.items(): - if s != "error" and not v: - unused_tok.append(s) - - return unused_tok - - # ------------------------------------------------------------------------------ - # unused_rules() - # - # Find all grammar rules that were defined, but not used (maybe not reachable) - # Returns a list of productions. - # ------------------------------------------------------------------------------ - - def unused_rules(self): - unused_prod = [] - for s, v in self.Nonterminals.items(): - if not v: - p = self.Prodnames[s][0] - unused_prod.append(p) - return unused_prod - - # ----------------------------------------------------------------------------- - # unused_precedence() - # - # Returns a list of tuples (term,precedence) corresponding to precedence - # rules that were never used by the grammar. term is the name of the terminal - # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. - # ----------------------------------------------------------------------------- - - def unused_precedence(self): - unused = [] - for termname in self.Precedence: - if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname, self.Precedence[termname][0])) - - return unused - - # ------------------------------------------------------------------------- - # _first() - # - # Compute the value of FIRST1(beta) where beta is a tuple of symbols. - # - # During execution of compute_first1, the result may be incomplete. - # Afterward (e.g., when called from compute_follow()), it will be complete. - # ------------------------------------------------------------------------- - def _first(self, beta): - - # We are computing First(x1,x2,x3,...,xn) - result = [] - for x in beta: - x_produces_empty = False - - # Add all the non- symbols of First[x] to the result. - for f in self.First[x]: - if f == "": - x_produces_empty = True - else: - if f not in result: - result.append(f) - - if x_produces_empty: - # We have to consider the next x in beta, - # i.e. stay in the loop. - pass - else: - # We don't have to consider any further symbols in beta. - break - else: - # There was no 'break' from the loop, - # so x_produces_empty was true for all x in beta, - # so beta produces empty as well. - result.append("") - - return result - - # ------------------------------------------------------------------------- - # compute_first() - # - # Compute the value of FIRST1(X) for all symbols - # ------------------------------------------------------------------------- - def compute_first(self): - if self.First: - return self.First - - # Terminals: - for t in self.Terminals: - self.First[t] = [t] - - self.First["$end"] = ["$end"] - - # Nonterminals: - - # Initialize to the empty set: - for n in self.Nonterminals: - self.First[n] = [] - - # Then propagate symbols until no change: - while True: - some_change = False - for n in self.Nonterminals: - for p in self.Prodnames[n]: - for f in self._first(p.prod): - if f not in self.First[n]: - self.First[n].append(f) - some_change = True - if not some_change: - break - - return self.First - - # --------------------------------------------------------------------- - # compute_follow() - # - # Computes all of the follow sets for every non-terminal symbol. The - # follow set is the set of all symbols that might follow a given - # non-terminal. See the Dragon book, 2nd Ed. p. 189. - # --------------------------------------------------------------------- - def compute_follow(self, start=None): - # If already computed, return the result - if self.Follow: - return self.Follow - - # If first sets not computed yet, do that first. - if not self.First: - self.compute_first() - - # Add '$end' to the follow list of the start symbol - for k in self.Nonterminals: - self.Follow[k] = [] - - if not start: - start = self.Productions[1].name - - self.Follow[start] = ["$end"] - - while True: - didadd = False - for p in self.Productions[1:]: - # Here is the production set - for i, B in enumerate(p.prod): - if B in self.Nonterminals: - # Okay. We got a non-terminal in a production - fst = self._first(p.prod[i + 1 :]) - hasempty = False - for f in fst: - if f != "" and f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = True - if f == "": - hasempty = True - if hasempty or i == (len(p.prod) - 1): - # Add elements of follow(a) to follow(b) - for f in self.Follow[p.name]: - if f not in self.Follow[B]: - self.Follow[B].append(f) - didadd = True - if not didadd: - break - return self.Follow - - # ----------------------------------------------------------------------------- - # build_lritems() - # - # This function walks the list of productions and builds a complete set of the - # LR items. The LR items are stored in two ways: First, they are uniquely - # numbered and placed in the list _lritems. Second, a linked list of LR items - # is built for each production. For example: - # - # E -> E PLUS E - # - # Creates the list - # - # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] - # ----------------------------------------------------------------------------- - - def build_lritems(self): - for p in self.Productions: - lastlri = p - i = 0 - lr_items = [] - while True: - if i > len(p): - lri = None - else: - lri = LRItem(p, i) - # Precompute the list of productions immediately following - try: - lri.lr_after = self.Prodnames[lri.prod[i + 1]] - except (IndexError, KeyError): - lri.lr_after = [] - try: - lri.lr_before = lri.prod[i - 1] - except IndexError: - lri.lr_before = None - - lastlri.lr_next = lri - if not lri: - break - lr_items.append(lri) - lastlri = lri - i += 1 - p.lr_items = lr_items - - -# ----------------------------------------------------------------------------- -# == Class LRTable == -# -# This basic class represents a basic table of LR parsing information. -# Methods for generating the tables are not defined here. They are defined -# in the derived class LRGeneratedTable. -# ----------------------------------------------------------------------------- - - -class VersionError(YaccError): - pass - - -class LRTable(object): - def __init__(self): - self.lr_action = None - self.lr_goto = None - self.lr_productions = None - self.lr_method = None - - def read_table(self, module): - if isinstance(module, types.ModuleType): - parsetab = module - else: - exec("import %s" % module) - parsetab = sys.modules[module] - - if parsetab._tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") - - self.lr_action = parsetab._lr_action - self.lr_goto = parsetab._lr_goto - - self.lr_productions = [] - for p in parsetab._lr_productions: - self.lr_productions.append(MiniProduction(*p)) - - self.lr_method = parsetab._lr_method - return parsetab._lr_signature - - def read_pickle(self, filename): - try: - import cPickle as pickle - except ImportError: - import pickle - - if not os.path.exists(filename): - raise ImportError - - in_f = open(filename, "rb") - - tabversion = pickle.load(in_f) - if tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") - self.lr_method = pickle.load(in_f) - signature = pickle.load(in_f) - self.lr_action = pickle.load(in_f) - self.lr_goto = pickle.load(in_f) - productions = pickle.load(in_f) - - self.lr_productions = [] - for p in productions: - self.lr_productions.append(MiniProduction(*p)) - - in_f.close() - return signature - - # Bind all production function names to callable objects in pdict - def bind_callables(self, pdict): - for p in self.lr_productions: - p.bind(pdict) - - -# ----------------------------------------------------------------------------- -# === LR Generator === -# -# The following classes and functions are used to generate LR parsing tables on -# a grammar. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# digraph() -# traverse() -# -# The following two functions are used to compute set valued functions -# of the form: -# -# F(x) = F'(x) U U{F(y) | x R y} -# -# This is used to compute the values of Read() sets as well as FOLLOW sets -# in LALR(1) generation. -# -# Inputs: X - An input set -# R - A relation -# FP - Set-valued function -# ------------------------------------------------------------------------------ - - -def digraph(X, R, FP): - N = {} - for x in X: - N[x] = 0 - stack = [] - F = {} - for x in X: - if N[x] == 0: - traverse(x, N, stack, F, X, R, FP) - return F - - -def traverse(x, N, stack, F, X, R, FP): - stack.append(x) - d = len(stack) - N[x] = d - F[x] = FP(x) # F(X) <- F'(x) - - rel = R(x) # Get y's related to x - for y in rel: - if N[y] == 0: - traverse(y, N, stack, F, X, R, FP) - N[x] = min(N[x], N[y]) - for a in F.get(y, []): - if a not in F[x]: - F[x].append(a) - if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - - -class LALRError(YaccError): - pass - - -# ----------------------------------------------------------------------------- -# == LRGeneratedTable == -# -# This class implements the LR table generation algorithm. There are no -# public methods except for write() -# ----------------------------------------------------------------------------- - - -class LRGeneratedTable(LRTable): - def __init__(self, grammar, method="LALR", log=None): - if method not in ["SLR", "LALR"]: - raise LALRError("Unsupported method %s" % method) - - self.grammar = grammar - self.lr_method = method - - # Set up the logger - if not log: - log = NullLogger() - self.log = log - - # Internal attributes - self.lr_action = {} # Action table - self.lr_goto = {} # Goto table - self.lr_productions = grammar.Productions # Copy of grammar Production array - self.lr_goto_cache = {} # Cache of computed gotos - self.lr0_cidhash = {} # Cache of closures - - self._add_count = 0 # Internal counter used to detect cycles - - # Diagonistic information filled in by the table generator - self.sr_conflict = 0 - self.rr_conflict = 0 - self.conflicts = [] # List of conflicts - - self.sr_conflicts = [] - self.rr_conflicts = [] - - # Build the tables - self.grammar.build_lritems() - self.grammar.compute_first() - self.grammar.compute_follow() - self.lr_parse_table() - - # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - - def lr0_closure(self, I): - self._add_count += 1 - - # Add everything in I to J - J = I[:] - didadd = True - while didadd: - didadd = False - for j in J: - for x in j.lr_after: - if getattr(x, "lr0_added", 0) == self._add_count: - continue - # Add B --> .G to J - J.append(x.lr_next) - x.lr0_added = self._add_count - didadd = True - - return J - - # Compute the LR(0) goto function goto(I,X) where I is a set - # of LR(0) items and X is a grammar symbol. This function is written - # in a way that guarantees uniqueness of the generated goto sets - # (i.e. the same goto set will never be returned as two different Python - # objects). With uniqueness, we can later do fast set comparisons using - # id(obj) instead of element-wise comparison. - - def lr0_goto(self, I, x): - # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I), x)) - if g: - return g - - # Now we generate the goto set in a way that guarantees uniqueness - # of the result - - s = self.lr_goto_cache.get(x) - if not s: - s = {} - self.lr_goto_cache[x] = s - - gs = [] - for p in I: - n = p.lr_next - if n and n.lr_before == x: - s1 = s.get(id(n)) - if not s1: - s1 = {} - s[id(n)] = s1 - gs.append(n) - s = s1 - g = s.get("$end") - if not g: - if gs: - g = self.lr0_closure(gs) - s["$end"] = g - else: - s["$end"] = gs - self.lr_goto_cache[(id(I), x)] = g - return g - - # Compute the LR(0) sets of item function - def lr0_items(self): - C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] - i = 0 - for I in C: - self.lr0_cidhash[id(I)] = i - i += 1 - - # Loop over the items in C and each grammar symbols - i = 0 - while i < len(C): - I = C[i] - i += 1 - - # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = {} - for ii in I: - for s in ii.usyms: - asyms[s] = None - - for x in asyms: - g = self.lr0_goto(I, x) - if not g or id(g) in self.lr0_cidhash: - continue - self.lr0_cidhash[id(g)] = len(C) - C.append(g) - - return C - - # ----------------------------------------------------------------------------- - # ==== LALR(1) Parsing ==== - # - # LALR(1) parsing is almost exactly the same as SLR except that instead of - # relying upon Follow() sets when performing reductions, a more selective - # lookahead set that incorporates the state of the LR(0) machine is utilized. - # Thus, we mainly just have to focus on calculating the lookahead sets. - # - # The method used here is due to DeRemer and Pennelo (1982). - # - # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) - # Lookahead Sets", ACM Transactions on Programming Languages and Systems, - # Vol. 4, No. 4, Oct. 1982, pp. 615-649 - # - # Further details can also be found in: - # - # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", - # McGraw-Hill Book Company, (1985). - # - # ----------------------------------------------------------------------------- - - # ----------------------------------------------------------------------------- - # compute_nullable_nonterminals() - # - # Creates a dictionary containing all of the non-terminals that might produce - # an empty production. - # ----------------------------------------------------------------------------- - - def compute_nullable_nonterminals(self): - nullable = set() - num_nullable = 0 - while True: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable.add(p.name) - continue - for t in p.prod: - if t not in nullable: - break - else: - nullable.add(p.name) - if len(nullable) == num_nullable: - break - num_nullable = len(nullable) - return nullable - - # ----------------------------------------------------------------------------- - # find_nonterminal_trans(C) - # - # Given a set of LR(0) items, this functions finds all of the non-terminal - # transitions. These are transitions in which a dot appears immediately before - # a non-terminal. Returns a list of tuples of the form (state,N) where state - # is the state number and N is the nonterminal symbol. - # - # The input C is the set of LR(0) items. - # ----------------------------------------------------------------------------- - - def find_nonterminal_transitions(self, C): - trans = [] - for stateno, state in enumerate(C): - for p in state: - if p.lr_index < p.len - 1: - t = (stateno, p.prod[p.lr_index + 1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: - trans.append(t) - return trans - - # ----------------------------------------------------------------------------- - # dr_relation() - # - # Computes the DR(p,A) relationships for non-terminal transitions. The input - # is a tuple (state,N) where state is a number and N is a nonterminal symbol. - # - # Returns a list of terminals. - # ----------------------------------------------------------------------------- - - def dr_relation(self, C, trans, nullable): - dr_set = {} - state, N = trans - terms = [] - - g = self.lr0_goto(C[state], N) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in self.grammar.Terminals: - if a not in terms: - terms.append(a) - - # This extra bit is to handle the start state - if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append("$end") - - return terms - - # ----------------------------------------------------------------------------- - # reads_relation() - # - # Computes the READS() relation (p,A) READS (t,C). - # ----------------------------------------------------------------------------- - - def reads_relation(self, C, trans, empty): - # Look for empty transitions - rel = [] - state, N = trans - - g = self.lr0_goto(C[state], N) - j = self.lr0_cidhash.get(id(g), -1) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j, a)) - - return rel - - # ----------------------------------------------------------------------------- - # compute_lookback_includes() - # - # Determines the lookback and includes relations - # - # LOOKBACK: - # - # This relation is determined by running the LR(0) state machine forward. - # For example, starting with a production "N : . A B C", we run it forward - # to obtain "N : A B C ." We then build a relationship between this final - # state and the starting state. These relationships are stored in a dictionary - # lookdict. - # - # INCLUDES: - # - # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). - # - # This relation is used to determine non-terminal transitions that occur - # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) - # if the following holds: - # - # B -> LAT, where T -> epsilon and p' -L-> p - # - # L is essentially a prefix (which may be empty), T is a suffix that must be - # able to derive an empty string. State p' must lead to state p with the string L. - # - # ----------------------------------------------------------------------------- - - def compute_lookback_includes(self, C, trans, nullable): - lookdict = {} # Dictionary of lookback relations - includedict = {} # Dictionary of include relations - - # Make a dictionary of non-terminal transitions - dtrans = {} - for t in trans: - dtrans[t] = 1 - - # Loop over all transitions and compute lookbacks and includes - for state, N in trans: - lookb = [] - includes = [] - for p in C[state]: - if p.name != N: - continue - - # Okay, we have a name match. We now follow the production all the way - # through the state machine until we get the . on the right hand side - - lr_index = p.lr_index - j = state - while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if (j, t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: - break # No forget it - if p.prod[li] not in nullable: - break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j, t)) - - g = self.lr0_goto(C[j], t) # Go to next set - j = self.lr0_cidhash.get(id(g), -1) # Go to next state - - # When we get here, j is the final state, now we have to locate the production - for r in C[j]: - if r.name != p.name: - continue - if r.len != p.len: - continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i + 1]: - break - i = i + 1 - else: - lookb.append((j, r)) - for i in includes: - if i not in includedict: - includedict[i] = [] - includedict[i].append((state, N)) - lookdict[(state, N)] = lookb - - return lookdict, includedict - - # ----------------------------------------------------------------------------- - # compute_read_sets() - # - # Given a set of LR(0) items, this function computes the read sets. - # - # Inputs: C = Set of LR(0) items - # ntrans = Set of nonterminal transitions - # nullable = Set of empty transitions - # - # Returns a set containing the read sets - # ----------------------------------------------------------------------------- - - def compute_read_sets(self, C, ntrans, nullable): - FP = lambda x: self.dr_relation(C, x, nullable) - R = lambda x: self.reads_relation(C, x, nullable) - F = digraph(ntrans, R, FP) - return F - - # ----------------------------------------------------------------------------- - # compute_follow_sets() - # - # Given a set of LR(0) items, a set of non-terminal transitions, a readset, - # and an include set, this function computes the follow sets - # - # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} - # - # Inputs: - # ntrans = Set of nonterminal transitions - # readsets = Readset (previously computed) - # inclsets = Include sets (previously computed) - # - # Returns a set containing the follow sets - # ----------------------------------------------------------------------------- - - def compute_follow_sets(self, ntrans, readsets, inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x, []) - F = digraph(ntrans, R, FP) - return F - - # ----------------------------------------------------------------------------- - # add_lookaheads() - # - # Attaches the lookahead symbols to grammar rules. - # - # Inputs: lookbacks - Set of lookback relations - # followset - Computed follow set - # - # This function directly attaches the lookaheads to productions contained - # in the lookbacks set - # ----------------------------------------------------------------------------- - - def add_lookaheads(self, lookbacks, followset): - for trans, lb in lookbacks.items(): - # Loop over productions in lookback - for state, p in lb: - if state not in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans, []) - for a in f: - if a not in p.lookaheads[state]: - p.lookaheads[state].append(a) - - # ----------------------------------------------------------------------------- - # add_lalr_lookaheads() - # - # This function does all of the work of adding lookahead information for use - # with LALR parsing - # ----------------------------------------------------------------------------- - - def add_lalr_lookaheads(self, C): - # Determine all of the nullable nonterminals - nullable = self.compute_nullable_nonterminals() - - # Find all non-terminal transitions - trans = self.find_nonterminal_transitions(C) - - # Compute read sets - readsets = self.compute_read_sets(C, trans, nullable) - - # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C, trans, nullable) - - # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans, readsets, included) - - # Add all of the lookaheads - self.add_lookaheads(lookd, followsets) - - # ----------------------------------------------------------------------------- - # lr_parse_table() - # - # This function constructs the parse tables for SLR or LALR - # ----------------------------------------------------------------------------- - def lr_parse_table(self): - Productions = self.grammar.Productions - Precedence = self.grammar.Precedence - goto = self.lr_goto # Goto array - action = self.lr_action # Action array - log = self.log # Logger for output - - actionp = {} # Action production array (temporary) - - log.info("Parsing method: %s", self.lr_method) - - # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items - # This determines the number of states - - C = self.lr0_items() - - if self.lr_method == "LALR": - self.add_lalr_lookaheads(C) - - # Build the parser table, state by state - st = 0 - for I in C: - # Loop over each production in I - actlist = [] # List of actions - st_action = {} - st_actionp = {} - st_goto = {} - log.info("") - log.info("state %d", st) - log.info("") - for p in I: - log.info(" (%d) %s", p.number, p) - log.info("") - - for p in I: - if p.len == p.lr_index + 1: - if p.name == "S'": - # Start symbol. Accept! - st_action["$end"] = 0 - st_actionp["$end"] = p - else: - # We are at the end of a production. Reduce! - if self.lr_method == "LALR": - laheads = p.lookaheads[st] - else: - laheads = self.grammar.Follow[p.name] - for a in laheads: - actlist.append( - (a, p, "reduce using rule %d (%s)" % (p.number, p)) - ) - r = st_action.get(a) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - - # Shift precedence comes from the token - sprec, slevel = Precedence.get(a, ("right", 0)) - - # Reduce precedence comes from rule being reduced (p) - rprec, rlevel = Productions[p.number].prec - - if (slevel < rlevel) or ( - (slevel == rlevel) and (rprec == "left") - ): - # We really need to reduce here. - st_action[a] = -p.number - st_actionp[a] = p - if not slevel and not rlevel: - log.info( - " ! shift/reduce conflict for %s resolved as reduce", - a, - ) - self.sr_conflicts.append((st, a, "reduce")) - Productions[p.number].reduced += 1 - elif (slevel == rlevel) and (rprec == "nonassoc"): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - log.info( - " ! shift/reduce conflict for %s resolved as shift", - a, - ) - self.sr_conflicts.append((st, a, "shift")) - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - st_action[a] = -p.number - st_actionp[a] = p - chosenp, rejectp = pp, oldp - Productions[p.number].reduced += 1 - Productions[oldp.number].reduced -= 1 - else: - chosenp, rejectp = oldp, pp - self.rr_conflicts.append((st, chosenp, rejectp)) - log.info( - " ! reduce/reduce conflict for %s resolved using rule %d (%s)", - a, - st_actionp[a].number, - st_actionp[a], - ) - else: - raise LALRError("Unknown conflict in state %d" % st) - else: - st_action[a] = -p.number - st_actionp[a] = p - Productions[p.number].reduced += 1 - else: - i = p.lr_index - a = p.prod[i + 1] # Get symbol right after the "." - if a in self.grammar.Terminals: - g = self.lr0_goto(I, a) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - # We are in a shift state - actlist.append((a, p, "shift and go to state %d" % j)) - r = st_action.get(a) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - raise LALRError( - "Shift/shift conflict in state %d" % st - ) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - - # Shift precedence comes from the token - sprec, slevel = Precedence.get(a, ("right", 0)) - - # Reduce precedence comes from the rule that could have been reduced - rprec, rlevel = Productions[ - st_actionp[a].number - ].prec - - if (slevel > rlevel) or ( - (slevel == rlevel) and (rprec == "right") - ): - # We decide to shift here... highest precedence to shift - Productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - if not rlevel: - log.info( - " ! shift/reduce conflict for %s resolved as shift", - a, - ) - self.sr_conflicts.append((st, a, "shift")) - elif (slevel == rlevel) and (rprec == "nonassoc"): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - log.info( - " ! shift/reduce conflict for %s resolved as reduce", - a, - ) - self.sr_conflicts.append((st, a, "reduce")) - - else: - raise LALRError("Unknown conflict in state %d" % st) - else: - st_action[a] = j - st_actionp[a] = p - - # Print the actions associated with each terminal - _actprint = {} - for a, p, m in actlist: - if a in st_action: - if p is st_actionp[a]: - log.info(" %-15s %s", a, m) - _actprint[(a, m)] = 1 - log.info("") - # Print the actions that were not used. (debugging) - not_used = 0 - for a, p, m in actlist: - if a in st_action: - if p is not st_actionp[a]: - if not (a, m) in _actprint: - log.debug(" ! %-15s [ %s ]", a, m) - not_used = 1 - _actprint[(a, m)] = 1 - if not_used: - log.debug("") - - # Construct the goto table for this state - - nkeys = {} - for ii in I: - for s in ii.usyms: - if s in self.grammar.Nonterminals: - nkeys[s] = None - for n in nkeys: - g = self.lr0_goto(I, n) - j = self.lr0_cidhash.get(id(g), -1) - if j >= 0: - st_goto[n] = j - log.info(" %-30s shift and go to state %d", n, j) - - action[st] = st_action - actionp[st] = st_actionp - goto[st] = st_goto - st += 1 - - # ----------------------------------------------------------------------------- - # write() - # - # This function writes the LR parsing tables to a file - # ----------------------------------------------------------------------------- - - def write_table(self, tabmodule, outputdir="", signature=""): - if isinstance(tabmodule, types.ModuleType): - raise IOError("Won't overwrite existing tabmodule") - - basemodulename = tabmodule.split(".")[-1] - filename = os.path.join(outputdir, basemodulename) + ".py" - try: - f = open(filename, "w") - - f.write( - """ -# %s -# This file is automatically generated. Do not edit. -_tabversion = %r - -_lr_method = %r - -_lr_signature = %r - """ - % ( - os.path.basename(filename), - __tabversion__, - self.lr_method, - signature, - ) - ) - - # Change smaller to 0 to go back to original tables - smaller = 1 - - # Factor out names to try and make smaller - if smaller: - items = {} - - for s, nd in self.lr_action.items(): - for name, v in nd.items(): - i = items.get(name) - if not i: - i = ([], []) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_action_items = {") - for k, v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write( - """ -_lr_action = {} -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = {} - _lr_action[_x][_k] = _y -del _lr_action_items -""" - ) - - else: - f.write("\n_lr_action = { ") - for k, v in self.lr_action.items(): - f.write("(%r,%r):%r," % (k[0], k[1], v)) - f.write("}\n") - - if smaller: - # Factor out names to try and make smaller - items = {} - - for s, nd in self.lr_goto.items(): - for name, v in nd.items(): - i = items.get(name) - if not i: - i = ([], []) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_goto_items = {") - for k, v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write( - """ -_lr_goto = {} -for _k, _v in _lr_goto_items.items(): - for _x, _y in zip(_v[0], _v[1]): - if not _x in _lr_goto: _lr_goto[_x] = {} - _lr_goto[_x][_k] = _y -del _lr_goto_items -""" - ) - else: - f.write("\n_lr_goto = { ") - for k, v in self.lr_goto.items(): - f.write("(%r,%r):%r," % (k[0], k[1], v)) - f.write("}\n") - - # Write production table - f.write("_lr_productions = [\n") - for p in self.lr_productions: - if p.func: - f.write( - " (%r,%r,%d,%r,%r,%d),\n" - % ( - p.str, - p.name, - p.len, - p.func, - os.path.basename(p.file), - p.line, - ) - ) - else: - f.write(" (%r,%r,%d,None,None,None),\n" % (str(p), p.name, p.len)) - f.write("]\n") - f.close() - - except IOError as e: - raise - - # ----------------------------------------------------------------------------- - # pickle_table() - # - # This function pickles the LR parsing tables to a supplied file object - # ----------------------------------------------------------------------------- - - def pickle_table(self, filename, signature=""): - try: - import cPickle as pickle - except ImportError: - import pickle - with open(filename, "wb") as outf: - pickle.dump(__tabversion__, outf, pickle_protocol) - pickle.dump(self.lr_method, outf, pickle_protocol) - pickle.dump(signature, outf, pickle_protocol) - pickle.dump(self.lr_action, outf, pickle_protocol) - pickle.dump(self.lr_goto, outf, pickle_protocol) - - outp = [] - for p in self.lr_productions: - if p.func: - outp.append( - (p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line) - ) - else: - outp.append((str(p), p.name, p.len, None, None, None)) - pickle.dump(outp, outf, pickle_protocol) - - -# ----------------------------------------------------------------------------- -# === INTROSPECTION === -# -# The following functions and classes are used to implement the PLY -# introspection features followed by the yacc() function itself. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# get_caller_module_dict() -# -# This function returns a dictionary containing all of the symbols defined within -# a caller further down the call stack. This is used to get the environment -# associated with the yacc() call if none was provided. -# ----------------------------------------------------------------------------- - - -def get_caller_module_dict(levels): - f = sys._getframe(levels) - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - return ldict - - -# ----------------------------------------------------------------------------- -# parse_grammar() -# -# This takes a raw grammar rule string and parses it into production data -# ----------------------------------------------------------------------------- -def parse_grammar(doc, file, line): - grammar = [] - # Split the doc string into lines - pstrings = doc.splitlines() - lastp = None - dline = line - for ps in pstrings: - dline += 1 - p = ps.split() - if not p: - continue - try: - if p[0] == "|": - # This is a continuation of a previous rule - if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) - prodname = lastp - syms = p[1:] - else: - prodname = p[0] - lastp = prodname - syms = p[2:] - assign = p[1] - if assign != ":" and assign != "::=": - raise SyntaxError( - "%s:%d: Syntax error. Expected ':'" % (file, dline) - ) - - grammar.append((file, dline, prodname, syms)) - except SyntaxError: - raise - except Exception: - raise SyntaxError( - "%s:%d: Syntax error in rule %r" % (file, dline, ps.strip()) - ) - - return grammar - - -# ----------------------------------------------------------------------------- -# ParserReflect() -# -# This class represents information extracted for building a parser including -# start symbol, error function, tokens, precedence list, action functions, -# etc. -# ----------------------------------------------------------------------------- -class ParserReflect(object): - def __init__(self, pdict, log=None): - self.pdict = pdict - self.start = None - self.error_func = None - self.tokens = None - self.modules = set() - self.grammar = [] - self.error = False - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log - - # Get all of the basic information - def get_all(self): - self.get_start() - self.get_error_func() - self.get_tokens() - self.get_precedence() - self.get_pfunctions() - - # Validate all of the information - def validate_all(self): - self.validate_start() - self.validate_error_func() - self.validate_tokens() - self.validate_precedence() - self.validate_pfunctions() - self.validate_modules() - return self.error - - # Compute a signature over the grammar - def signature(self): - parts = [] - try: - if self.start: - parts.append(self.start) - if self.prec: - parts.append("".join(["".join(p) for p in self.prec])) - if self.tokens: - parts.append(" ".join(self.tokens)) - for f in self.pfuncs: - if f[3]: - parts.append(f[3]) - except (TypeError, ValueError): - pass - return "".join(parts) - - # ----------------------------------------------------------------------------- - # validate_modules() - # - # This method checks to see if there are duplicated p_rulename() functions - # in the parser module file. Without this function, it is really easy for - # users to make mistakes by cutting and pasting code fragments (and it's a real - # bugger to try and figure out why the resulting parser doesn't work). Therefore, - # we just do a little regular expression pattern matching of def statements - # to try and detect duplicates. - # ----------------------------------------------------------------------------- - - def validate_modules(self): - # Match def p_funcname( - fre = re.compile(r"\s*def\s+(p_[a-zA-Z_0-9]*)\(") - - for module in self.modules: - try: - lines, linen = inspect.getsourcelines(module) - except IOError: - continue - - counthash = {} - for linen, line in enumerate(lines): - linen += 1 - m = fre.match(line) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - filename = inspect.getsourcefile(module) - self.log.warning( - "%s:%d: Function %s redefined. Previously defined on line %d", - filename, - linen, - name, - prev, - ) - - # Get the start symbol - def get_start(self): - self.start = self.pdict.get("start") - - # Validate the start symbol - def validate_start(self): - if self.start is not None: - if not isinstance(self.start, string_types): - self.log.error("'start' must be a string") - - # Look for error handler - def get_error_func(self): - self.error_func = self.pdict.get("p_error") - - # Validate the error function - def validate_error_func(self): - if self.error_func: - if isinstance(self.error_func, types.FunctionType): - ismethod = 0 - elif isinstance(self.error_func, types.MethodType): - ismethod = 1 - else: - self.log.error("'p_error' defined, but is not a function or method") - self.error = True - return - - eline = self.error_func.__code__.co_firstlineno - efile = self.error_func.__code__.co_filename - module = inspect.getmodule(self.error_func) - self.modules.add(module) - - argcount = self.error_func.__code__.co_argcount - ismethod - if argcount != 1: - self.log.error("%s:%d: p_error() requires 1 argument", efile, eline) - self.error = True - - # Get the tokens map - def get_tokens(self): - tokens = self.pdict.get("tokens") - if not tokens: - self.log.error("No token list is defined") - self.error = True - return - - if not isinstance(tokens, (list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = True - return - - if not tokens: - self.log.error("tokens is empty") - self.error = True - return - - self.tokens = tokens - - # Validate the tokens - def validate_tokens(self): - # Validate the tokens. - if "error" in self.tokens: - self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = True - return - - terminals = set() - for n in self.tokens: - if n in terminals: - self.log.warning("Token %r multiply defined", n) - terminals.add(n) - - # Get the precedence map (if any) - def get_precedence(self): - self.prec = self.pdict.get("precedence") - - # Validate and parse the precedence map - def validate_precedence(self): - preclist = [] - if self.prec: - if not isinstance(self.prec, (list, tuple)): - self.log.error("precedence must be a list or tuple") - self.error = True - return - for level, p in enumerate(self.prec): - if not isinstance(p, (list, tuple)): - self.log.error("Bad precedence table") - self.error = True - return - - if len(p) < 2: - self.log.error( - "Malformed precedence entry %s. Must be (assoc, term, ..., term)", - p, - ) - self.error = True - return - assoc = p[0] - if not isinstance(assoc, string_types): - self.log.error("precedence associativity must be a string") - self.error = True - return - for term in p[1:]: - if not isinstance(term, string_types): - self.log.error("precedence items must be strings") - self.error = True - return - preclist.append((term, assoc, level + 1)) - self.preclist = preclist - - # Get all p_functions from the grammar - def get_pfunctions(self): - p_functions = [] - for name, item in self.pdict.items(): - if not name.startswith("p_") or name == "p_error": - continue - if isinstance(item, (types.FunctionType, types.MethodType)): - line = getattr(item, "co_firstlineno", item.__code__.co_firstlineno) - module = inspect.getmodule(item) - p_functions.append((line, module, name, item.__doc__)) - - # Sort all of the actions by line number; make sure to stringify - # modules to make them sortable, since `line` may not uniquely sort all - # p functions - p_functions.sort( - key=lambda p_function: ( - p_function[0], - str(p_function[1]), - p_function[2], - p_function[3], - ) - ) - self.pfuncs = p_functions - - # Validate all of the p_functions - def validate_pfunctions(self): - grammar = [] - # Check for non-empty symbols - if len(self.pfuncs) == 0: - self.log.error("no rules of the form p_rulename are defined") - self.error = True - return - - for line, module, name, doc in self.pfuncs: - file = inspect.getsourcefile(module) - func = self.pdict[name] - if isinstance(func, types.MethodType): - reqargs = 2 - else: - reqargs = 1 - if func.__code__.co_argcount > reqargs: - self.log.error( - "%s:%d: Rule %r has too many arguments", file, line, func.__name__ - ) - self.error = True - elif func.__code__.co_argcount < reqargs: - self.log.error( - "%s:%d: Rule %r requires an argument", file, line, func.__name__ - ) - self.error = True - elif not func.__doc__: - self.log.warning( - "%s:%d: No documentation string specified in function %r (ignored)", - file, - line, - func.__name__, - ) - else: - try: - parsed_g = parse_grammar(doc, file, line) - for g in parsed_g: - grammar.append((name, g)) - except SyntaxError as e: - self.log.error(str(e)) - self.error = True - - # Looks like a valid grammar rule - # Mark the file in which defined. - self.modules.add(module) - - # Secondary validation step that looks for p_ definitions that are not functions - # or functions that look like they might be grammar rules. - - for n, v in self.pdict.items(): - if n.startswith("p_") and isinstance( - v, (types.FunctionType, types.MethodType) - ): - continue - if n.startswith("t_"): - continue - if n.startswith("p_") and n != "p_error": - self.log.warning("%r not defined as a function", n) - if (isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or ( - isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2 - ): - if v.__doc__: - try: - doc = v.__doc__.split(" ") - if doc[1] == ":": - self.log.warning( - "%s:%d: Possible grammar rule %r defined without p_ prefix", - v.__code__.co_filename, - v.__code__.co_firstlineno, - n, - ) - except IndexError: - pass - - self.grammar = grammar - - -# ----------------------------------------------------------------------------- -# yacc(module) -# -# Build a parser -# ----------------------------------------------------------------------------- - - -def yacc( - method="LALR", - debug=yaccdebug, - module=None, - tabmodule=tab_module, - start=None, - check_recursion=True, - optimize=False, - write_tables=True, - debugfile=debug_file, - outputdir=None, - debuglog=None, - errorlog=None, - picklefile=None, -): - - if tabmodule is None: - tabmodule = tab_module - - # Reference to the parsing method of the last built parser - global parse - - # If pickling is enabled, table files are not created - if picklefile: - write_tables = 0 - - if errorlog is None: - errorlog = PlyLogger(sys.stderr) - - # Get the module dictionary used for the parser - if module: - _items = [(k, getattr(module, k)) for k in dir(module)] - pdict = dict(_items) - # If no __file__ attribute is available, try to obtain it from the __module__ instead - if "__file__" not in pdict: - pdict["__file__"] = sys.modules[pdict["__module__"]].__file__ - else: - pdict = get_caller_module_dict(2) - - if outputdir is None: - # If no output directory is set, the location of the output files - # is determined according to the following rules: - # - If tabmodule specifies a package, files go into that package directory - # - Otherwise, files go in the same directory as the specifying module - if isinstance(tabmodule, types.ModuleType): - srcfile = tabmodule.__file__ - else: - if "." not in tabmodule: - srcfile = pdict["__file__"] - else: - parts = tabmodule.split(".") - pkgname = ".".join(parts[:-1]) - exec("import %s" % pkgname) - srcfile = getattr(sys.modules[pkgname], "__file__", "") - outputdir = os.path.dirname(srcfile) - - # Determine if the module is package of a package or not. - # If so, fix the tabmodule setting so that tables load correctly - pkg = pdict.get("__package__") - if pkg and isinstance(tabmodule, str): - if "." not in tabmodule: - tabmodule = pkg + "." + tabmodule - - # Set start symbol if it's specified directly using an argument - if start is not None: - pdict["start"] = start - - # Collect parser information from the dictionary - pinfo = ParserReflect(pdict, log=errorlog) - pinfo.get_all() - - if pinfo.error: - raise YaccError("Unable to build parser") - - # Check signature against table files (if any) - signature = pinfo.signature() - - # Read the tables - try: - lr = LRTable() - if picklefile: - read_signature = lr.read_pickle(picklefile) - else: - read_signature = lr.read_table(tabmodule) - if optimize or (read_signature == signature): - try: - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr, pinfo.error_func) - parse = parser.parse - return parser - except Exception as e: - errorlog.warning("There was a problem loading the table file: %r", e) - except VersionError as e: - errorlog.warning(str(e)) - except ImportError: - pass - - if debuglog is None: - if debug: - try: - debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), "w")) - except IOError as e: - errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) - debuglog = NullLogger() - else: - debuglog = NullLogger() - - debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) - - errors = False - - # Validate the parser information - if pinfo.validate_all(): - raise YaccError("Unable to build parser") - - if not pinfo.error_func: - errorlog.warning("no p_error() function is defined") - - # Create a grammar object - grammar = Grammar(pinfo.tokens) - - # Set precedence level for terminals - for term, assoc, level in pinfo.preclist: - try: - grammar.set_precedence(term, assoc, level) - except GrammarError as e: - errorlog.warning("%s", e) - - # Add productions to the grammar - for funcname, gram in pinfo.grammar: - file, line, prodname, syms = gram - try: - grammar.add_production(prodname, syms, funcname, file, line) - except GrammarError as e: - errorlog.error("%s", e) - errors = True - - # Set the grammar start symbols - try: - if start is None: - grammar.set_start(pinfo.start) - else: - grammar.set_start(start) - except GrammarError as e: - errorlog.error(str(e)) - errors = True - - if errors: - raise YaccError("Unable to build parser") - - # Verify the grammar structure - undefined_symbols = grammar.undefined_symbols() - for sym, prod in undefined_symbols: - errorlog.error( - "%s:%d: Symbol %r used, but not defined as a token or a rule", - prod.file, - prod.line, - sym, - ) - errors = True - - unused_terminals = grammar.unused_terminals() - if unused_terminals: - debuglog.info("") - debuglog.info("Unused terminals:") - debuglog.info("") - for term in unused_terminals: - errorlog.warning("Token %r defined, but not used", term) - debuglog.info(" %s", term) - - # Print out all productions to the debug log - if debug: - debuglog.info("") - debuglog.info("Grammar") - debuglog.info("") - for n, p in enumerate(grammar.Productions): - debuglog.info("Rule %-5d %s", n, p) - - # Find unused non-terminals - unused_rules = grammar.unused_rules() - for prod in unused_rules: - errorlog.warning( - "%s:%d: Rule %r defined, but not used", prod.file, prod.line, prod.name - ) - - if len(unused_terminals) == 1: - errorlog.warning("There is 1 unused token") - if len(unused_terminals) > 1: - errorlog.warning("There are %d unused tokens", len(unused_terminals)) - - if len(unused_rules) == 1: - errorlog.warning("There is 1 unused rule") - if len(unused_rules) > 1: - errorlog.warning("There are %d unused rules", len(unused_rules)) - - if debug: - debuglog.info("") - debuglog.info("Terminals, with rules where they appear") - debuglog.info("") - terms = list(grammar.Terminals) - terms.sort() - for term in terms: - debuglog.info( - "%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]) - ) - - debuglog.info("") - debuglog.info("Nonterminals, with rules where they appear") - debuglog.info("") - nonterms = list(grammar.Nonterminals) - nonterms.sort() - for nonterm in nonterms: - debuglog.info( - "%-20s : %s", - nonterm, - " ".join([str(s) for s in grammar.Nonterminals[nonterm]]), - ) - debuglog.info("") - - if check_recursion: - unreachable = grammar.find_unreachable() - for u in unreachable: - errorlog.warning("Symbol %r is unreachable", u) - - infinite = grammar.infinite_cycles() - for inf in infinite: - errorlog.error("Infinite recursion detected for symbol %r", inf) - errors = True - - unused_prec = grammar.unused_precedence() - for term, assoc in unused_prec: - errorlog.error("Precedence rule %r defined for unknown symbol %r", assoc, term) - errors = True - - if errors: - raise YaccError("Unable to build parser") - - # Run the LRGeneratedTable on the grammar - if debug: - errorlog.debug("Generating %s tables", method) - - lr = LRGeneratedTable(grammar, method, debuglog) - - if debug: - num_sr = len(lr.sr_conflicts) - - # Report shift/reduce and reduce/reduce conflicts - if num_sr == 1: - errorlog.warning("1 shift/reduce conflict") - elif num_sr > 1: - errorlog.warning("%d shift/reduce conflicts", num_sr) - - num_rr = len(lr.rr_conflicts) - if num_rr == 1: - errorlog.warning("1 reduce/reduce conflict") - elif num_rr > 1: - errorlog.warning("%d reduce/reduce conflicts", num_rr) - - # Write out conflicts to the output file - if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning("") - debuglog.warning("Conflicts:") - debuglog.warning("") - - for state, tok, resolution in lr.sr_conflicts: - debuglog.warning( - "shift/reduce conflict for %s in state %d resolved as %s", - tok, - state, - resolution, - ) - - already_reported = set() - for state, rule, rejected in lr.rr_conflicts: - if (state, id(rule), id(rejected)) in already_reported: - continue - debuglog.warning( - "reduce/reduce conflict in state %d resolved using rule (%s)", - state, - rule, - ) - debuglog.warning("rejected rule (%s) in state %d", rejected, state) - errorlog.warning( - "reduce/reduce conflict in state %d resolved using rule (%s)", - state, - rule, - ) - errorlog.warning("rejected rule (%s) in state %d", rejected, state) - already_reported.add((state, id(rule), id(rejected))) - - warned_never = [] - for state, rule, rejected in lr.rr_conflicts: - if not rejected.reduced and (rejected not in warned_never): - debuglog.warning("Rule (%s) is never reduced", rejected) - errorlog.warning("Rule (%s) is never reduced", rejected) - warned_never.append(rejected) - - # Write the table file if requested - if write_tables: - try: - lr.write_table(tabmodule, outputdir, signature) - except IOError as e: - errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) - - # Write a pickled version of the tables - if picklefile: - try: - lr.pickle_table(picklefile, signature) - except IOError as e: - errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) - - # Build the parser - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr, pinfo.error_func) - - parse = parser.parse - return parser diff --git a/src/pycparser/ply/yacc.pyc b/src/pycparser/ply/yacc.pyc deleted file mode 100644 index 6e743ab2..00000000 Binary files a/src/pycparser/ply/yacc.pyc and /dev/null differ diff --git a/src/pycparser/ply/ygen.py b/src/pycparser/ply/ygen.py deleted file mode 100644 index f84251f5..00000000 --- a/src/pycparser/ply/ygen.py +++ /dev/null @@ -1,75 +0,0 @@ -# ply: ygen.py -# -# This is a support program that auto-generates different versions of the YACC parsing -# function with different features removed for the purposes of performance. -# -# Users should edit the method LParser.parsedebug() in yacc.py. The source code -# for that method is then used to create the other methods. See the comments in -# yacc.py for further details. - -import os.path -import shutil - - -def get_source_range(lines, tag): - srclines = enumerate(lines) - start_tag = "#--! %s-start" % tag - end_tag = "#--! %s-end" % tag - - for start_index, line in srclines: - if line.strip().startswith(start_tag): - break - - for end_index, line in srclines: - if line.strip().endswith(end_tag): - break - - return (start_index + 1, end_index) - - -def filter_section(lines, tag): - filtered_lines = [] - include = True - tag_text = "#--! %s" % tag - for line in lines: - if line.strip().startswith(tag_text): - include = not include - elif include: - filtered_lines.append(line) - return filtered_lines - - -def main(): - dirname = os.path.dirname(__file__) - shutil.copy2(os.path.join(dirname, "yacc.py"), os.path.join(dirname, "yacc.py.bak")) - with open(os.path.join(dirname, "yacc.py"), "r") as f: - lines = f.readlines() - - parse_start, parse_end = get_source_range(lines, "parsedebug") - parseopt_start, parseopt_end = get_source_range(lines, "parseopt") - parseopt_notrack_start, parseopt_notrack_end = get_source_range( - lines, "parseopt-notrack" - ) - - # Get the original source - orig_lines = lines[parse_start:parse_end] - - # Filter the DEBUG sections out - parseopt_lines = filter_section(orig_lines, "DEBUG") - - # Filter the TRACKING sections out - parseopt_notrack_lines = filter_section(parseopt_lines, "TRACKING") - - # Replace the parser source sections with updated versions - lines[parseopt_notrack_start:parseopt_notrack_end] = parseopt_notrack_lines - lines[parseopt_start:parseopt_end] = parseopt_lines - - lines = [line.rstrip() + "\n" for line in lines] - with open(os.path.join(dirname, "yacc.py"), "w") as f: - f.writelines(lines) - - print("Updated yacc.py") - - -if __name__ == "__main__": - main() diff --git a/src/pycparser/plyparser.py b/src/pycparser/plyparser.py deleted file mode 100755 index afeb7b29..00000000 --- a/src/pycparser/plyparser.py +++ /dev/null @@ -1,138 +0,0 @@ -# ----------------------------------------------------------------- -# plyparser.py -# -# PLYParser class and other utilites for simplifying programming -# parsers with PLY -# -# Eli Bendersky [https://eli.thegreenplace.net/] -# License: BSD -# ----------------------------------------------------------------- - -import warnings - - -class Coord(object): - """Coordinates of a syntactic element. Consists of: - - File name - - Line number - - (optional) column number, for the Lexer - """ - - __slots__ = ("file", "line", "column", "__weakref__") - - def __init__(self, file, line, column=None): - self.file = file - self.line = line - self.column = column - - def __str__(self): - str = "%s:%s" % (self.file, self.line) - if self.column: - str += ":%s" % self.column - return str - - -class ParseError(Exception): - pass - - -class PLYParser(object): - def _create_opt_rule(self, rulename): - """Given a rule name, creates an optional ply.yacc rule - for it. The name of the optional rule is - _opt - """ - optname = rulename + "_opt" - - def optrule(self, p): - p[0] = p[1] - - optrule.__doc__ = "%s : empty\n| %s" % (optname, rulename) - optrule.__name__ = "p_%s" % optname - setattr(self.__class__, optrule.__name__, optrule) - - def _coord(self, lineno, column=None): - return Coord(file=self.clex.filename, line=lineno, column=column) - - def _token_coord(self, p, token_idx): - """Returns the coordinates for the YaccProduction objet 'p' indexed - with 'token_idx'. The coordinate includes the 'lineno' and - 'column'. Both follow the lex semantic, starting from 1. - """ - last_cr = p.lexer.lexer.lexdata.rfind("\n", 0, p.lexpos(token_idx)) - if last_cr < 0: - last_cr = -1 - column = p.lexpos(token_idx) - (last_cr) - return self._coord(p.lineno(token_idx), column) - - def _parse_error(self, msg, coord): - raise ParseError("%s: %s" % (coord, msg)) - - -def parameterized(*params): - """Decorator to create parameterized rules. - - Parameterized rule methods must be named starting with 'p_' and contain - 'xxx', and their docstrings may contain 'xxx' and 'yyy'. These will be - replaced by the given parameter tuples. For example, ``p_xxx_rule()`` with - docstring 'xxx_rule : yyy' when decorated with - ``@parameterized(('id', 'ID'))`` produces ``p_id_rule()`` with the docstring - 'id_rule : ID'. Using multiple tuples produces multiple rules. - """ - - def decorate(rule_func): - rule_func._params = params - return rule_func - - return decorate - - -def template(cls): - """Class decorator to generate rules from parameterized rule templates. - - See `parameterized` for more information on parameterized rules. - """ - issued_nodoc_warning = False - for attr_name in dir(cls): - if attr_name.startswith("p_"): - method = getattr(cls, attr_name) - if hasattr(method, "_params"): - # Remove the template method - delattr(cls, attr_name) - # Create parameterized rules from this method; only run this if - # the method has a docstring. This is to address an issue when - # pycparser's users are installed in -OO mode which strips - # docstrings away. - # See: https://github.com/eliben/pycparser/pull/198/ and - # https://github.com/eliben/pycparser/issues/197 - # for discussion. - if method.__doc__ is not None: - _create_param_rules(cls, method) - elif not issued_nodoc_warning: - warnings.warn( - "parsing methods must have __doc__ for pycparser to work properly", - RuntimeWarning, - stacklevel=2, - ) - issued_nodoc_warning = True - return cls - - -def _create_param_rules(cls, func): - """Create ply.yacc rules based on a parameterized rule function - - Generates new methods (one per each pair of parameters) based on the - template rule function `func`, and attaches them to `cls`. The rule - function's parameters must be accessible via its `_params` attribute. - """ - for xxx, yyy in func._params: - # Use the template method's body for each new method - def param_rule(self, p): - func(self, p) - - # Substitute in the params for the grammar rule and function name - param_rule.__doc__ = func.__doc__.replace("xxx", xxx).replace("yyy", yyy) - param_rule.__name__ = func.__name__.replace("xxx", xxx) - - # Attach the new method to the class - setattr(cls, param_rule.__name__, param_rule) diff --git a/src/pycparser/plyparser.pyc b/src/pycparser/plyparser.pyc deleted file mode 100644 index 7b5ad8ef..00000000 Binary files a/src/pycparser/plyparser.pyc and /dev/null differ diff --git a/submodule/pycparser b/submodule/pycparser new file mode 160000 index 00000000..caa4c11e --- /dev/null +++ b/submodule/pycparser @@ -0,0 +1 @@ +Subproject commit caa4c11ebb99ed5cf854dc6342b5352d5ff52686