|
| 1 | +// Lark: https://github.com/lark-parser/lark |
| 2 | + |
| 3 | +// Mostly based on syntax specified in: |
| 4 | +// https://github.com/antlr/grammars-v4/blob/master/c/C.g4 |
| 5 | + |
| 6 | +// Special hook for the preprocessor so we can use constant_expression |
| 7 | +preprocessor_hook: "%{$PREPROCESSOR" constant_expression "}%" |
| 8 | + |
| 9 | +program: (external_declaration* | preprocessor_hook) |
| 10 | + |
| 11 | +pragma: "_Pragma" "(" STRING_LITERAL ")" |
| 12 | + |
| 13 | +?external_declaration: function_definition | declaration | ";" | pragma |
| 14 | + |
| 15 | +function_definition: decl_specifier declarator block_statement |
| 16 | + |
| 17 | +declaration: decl_specifier init_declarator_list ";" |
| 18 | + | decl_specifier ";" |
| 19 | + |
| 20 | +init_declarator_list: init_declarator ("," init_declarator)* |
| 21 | + |
| 22 | +init_declarator: declarator |
| 23 | + | declarator "=" initializer |
| 24 | + |
| 25 | +initializer: assignment_expression |
| 26 | + | "{" initializer_list ","? "}" -> compound_initializer |
| 27 | + |
| 28 | +initializer_list: initializer_item ("," initializer_item)* |
| 29 | +initializer_item: designation? initializer |
| 30 | +?designation: designator_list "=" |
| 31 | +?designator_list: designator designator_list | designator |
| 32 | +designator: "[" constant_expression "]" -> index_member_reference |
| 33 | + | "." identifier_expr -> name_member_reference |
| 34 | + |
| 35 | +declarator: STAR* direct_declarator |
| 36 | + |
| 37 | +direct_declarator: identifier_expr |
| 38 | + | identifier_expr "[" assignment_expression? "]" -> array_declarator |
| 39 | + | identifier_expr "(" param_list? ")" -> func_declarator |
| 40 | + |
| 41 | +param_list: param_declaration ("," param_declaration)* ("," VARARGS)? |
| 42 | + |
| 43 | +param_declaration: decl_specifier declarator |
| 44 | + | decl_specifier STAR* -> unnamed_param_declaration |
| 45 | + |
| 46 | +// Use epsilon in child rules rather than make optional so we don't |
| 47 | +// loose it from the tree |
| 48 | +decl_specifier: storage_class_specifier type_qualifier type_specifier |
| 49 | +storage_class_specifier: TYPEDEF | STATIC | |
| 50 | +type_qualifier: CONST | |
| 51 | + |
| 52 | +?type_specifier: struct_specifier | typedef_name |
| 53 | + |
| 54 | +typedef_name: TYPEDEF_NAME |
| 55 | +TYPEDEF_NAME.2: IDENT |
| 56 | + |
| 57 | +?struct_specifier: struct_spec_reference | struct_spec_declaration |
| 58 | +struct_spec_reference: "struct" identifier_expr |
| 59 | +struct_spec_declaration: "struct" identifier_expr? "{" struct_declaration_list "}" |
| 60 | + |
| 61 | +struct_declaration_list: struct_declaration+ |
| 62 | +struct_declaration: type_specifier struct_declarator_list? ";" |
| 63 | + |
| 64 | +struct_declarator_list: struct_declarator ("," struct_declarator)* |
| 65 | + |
| 66 | +?struct_declarator: declarator |
| 67 | + |
| 68 | +type_name: type_specifier STAR* |
| 69 | + |
| 70 | +// So the transformer can distinguish between block-level declarations |
| 71 | +// and global |
| 72 | +block_declaration: declaration |
| 73 | +block_statement: "{" (statement | block_declaration)* "}" |
| 74 | + |
| 75 | +?statement: block_statement | statement_no_block |
| 76 | + |
| 77 | +?statement_no_block: labelled_statement |
| 78 | + | expression? ";" -> expression_statement |
| 79 | + | "if" "(" expression ")" statement ["else" statement] -> if_statement |
| 80 | + | "switch" "(" expression ")" "{" switch_case_fragment* "}" -> switch_statement |
| 81 | + | "while" "(" expression ")" statement -> while_statement |
| 82 | + | "do" statement "while" "(" expression ")" ";" -> do_while_statement |
| 83 | + | "for" "(" expression? ";" expression? ";" expression? ")" statement -> for_statement |
| 84 | + | "goto" identifier_expr ";" -> goto_statement |
| 85 | + | "continue" ";" -> continue_statement |
| 86 | + | "break" ";" -> break_statement |
| 87 | + | "return" expression? ";" -> return_statement |
| 88 | + | "sync" ";" -> sync_statement |
| 89 | + | pragma |
| 90 | + |
| 91 | +switch_case_fragment: "case" constant_expression ":" switch_case_body? |
| 92 | + | "default" ":" switch_case_body? -> switch_default_fragment |
| 93 | +?switch_case_body: block_statement | statement_no_block+ |
| 94 | + |
| 95 | +?labelled_statement: identifier_expr ":" statement -> label_statement |
| 96 | +// | "case" constant_expression ":" statement -> case_statement |
| 97 | +// | "default" ":" statement -> default_statement |
| 98 | + |
| 99 | +?expression: assignment_expression |
| 100 | + | expression "," assignment_expression |
| 101 | + |
| 102 | +?constant_expression: conditional_expression |
| 103 | + |
| 104 | +?assignment_expression: conditional_expression |
| 105 | + | unary_expression (ASSIGN | ASSIGN_OP) assignment_expression |
| 106 | + |
| 107 | +?conditional_expression: logical_or_expression ["?" expression ":" conditional_expression] |
| 108 | + |
| 109 | +?logical_or_expression: logical_and_expression |
| 110 | + | logical_or_expression LOG_OR_OP logical_and_expression -> binop_expr |
| 111 | + |
| 112 | +?logical_and_expression: inclusive_or_expression |
| 113 | + | logical_and_expression LOG_AND_OP inclusive_or_expression -> binop_expr |
| 114 | + |
| 115 | +?inclusive_or_expression: exclusive_or_expression |
| 116 | + | inclusive_or_expression OR_OP exclusive_or_expression -> binop_expr |
| 117 | + |
| 118 | +?exclusive_or_expression: and_expression |
| 119 | + | exclusive_or_expression XOR_OP and_expression -> binop_expr |
| 120 | + |
| 121 | +?and_expression: equality_expression |
| 122 | + | and_expression AND_OP equality_expression -> binop_expr |
| 123 | + |
| 124 | +?equality_expression: relational_expression |
| 125 | + | equality_expression EQ relational_expression -> binop_expr |
| 126 | + | equality_expression NEQ relational_expression -> binop_expr |
| 127 | + |
| 128 | +?relational_expression: shift_expression |
| 129 | + | relational_expression REL_OP shift_expression -> binop_expr |
| 130 | + |
| 131 | +?shift_expression: additive_expression |
| 132 | + | shift_expression SHIFT_OP additive_expression -> binop_expr |
| 133 | + |
| 134 | +?additive_expression: multiplicative_expression |
| 135 | + | additive_expression ADD_OP multiplicative_expression -> binop_expr |
| 136 | + |
| 137 | +?multiplicative_expression: cast_expression |
| 138 | + | multiplicative_expression (STAR | MUL_OP) cast_expression -> binop_expr |
| 139 | + |
| 140 | +?cast_expression: "(" type_name ")" cast_expression |
| 141 | + | unary_expression |
| 142 | + |
| 143 | +?unary_expression: postfix_expression |
| 144 | + | INCREMENT_OP unary_expression -> pre_increment_expr |
| 145 | + | UNARY_OP cast_expression |
| 146 | + | "sizeof" ( "(" type_name ")" | unary_expression ) -> sizeof_expr |
| 147 | + |
| 148 | +?postfix_expression: primary_expression |
| 149 | + | postfix_expression "[" expression "]" -> array_subscript_expr |
| 150 | + | postfix_expression "(" (assignment_expression ("," assignment_expression)*)? ")" -> function_call_expr |
| 151 | + | postfix_expression (DOT | ARROW) identifier_expr -> member_access_expr |
| 152 | + | postfix_expression INCREMENT_OP -> post_increment_expr |
| 153 | + |
| 154 | +?primary_expression: identifier_expr |
| 155 | + | INT_CONSTANT -> int_literal |
| 156 | + | STRING_LITERAL+ -> string_literal |
| 157 | + | "(" expression ")" |
| 158 | + |
| 159 | + |
| 160 | +identifier_expr: IDENT |
| 161 | + |
| 162 | +VARARGS: "..." |
| 163 | + |
| 164 | +TYPEDEF: "typedef" |
| 165 | +STATIC: "static" |
| 166 | +CONST: "const" |
| 167 | + |
| 168 | +STAR.0: "*" |
| 169 | +EQ.2: "==" |
| 170 | +ASSIGN: "=" |
| 171 | +ASSIGN_OP: ASSIGN | "*=" | "/=" | "%=" | "+=" | "-=" | "<<=" | ">>=" | "&=" | "^=" | "|=" |
| 172 | + |
| 173 | +LOG_OR_OP: "||" |
| 174 | +LOG_AND_OP: "&&" |
| 175 | +OR_OP: "|" |
| 176 | +XOR_OP: "^" |
| 177 | +AND_OP: "&" |
| 178 | +NEQ: "!=" |
| 179 | +REL_OP: "<=" | ">=" | "<" | ">" |
| 180 | +SHIFT_OP: "<<" | ">>" |
| 181 | +ADD_OP: "+" | "-" |
| 182 | +MUL_OP: "*" | "/" | "%" |
| 183 | +UNARY_OP: "&" | "*" | "+" | "-" | "~" | "!" |
| 184 | +INCREMENT_OP: "++" | "--" |
| 185 | +DOT: "." |
| 186 | +ARROW: "->" |
| 187 | + |
| 188 | +INT_CONSTANT: DEC_CONSTANT | OCT_CONSTANT | HEX_CONSTANT | BIN_CONSTANT | CHAR_CONSTANT | "0" |
| 189 | +DEC_CONSTANT: NON_ZERO_DIGIT DIGIT* |
| 190 | +OCT_CONSTANT: "0" ("0".."7")+ |
| 191 | +HEX_CONSTANT: "0" ("x" | "X") ("0".."9" | "A".."F" | "a".."f")+ |
| 192 | +BIN_CONSTANT: "0" ("b" | "B") ("0" | "1")+ |
| 193 | + |
| 194 | +SINGLE_CHAR: (/(<?!\\)./ | "\\" /([abefnrtv\\'"?]|x[\da-fA-F]+|[0-7]{1,3})/ ) |
| 195 | + |
| 196 | +CHAR_CONSTANT: "'" SINGLE_CHAR "'" |
| 197 | + |
| 198 | +%import common.ESCAPED_STRING |
| 199 | +STRING_LITERAL: ESCAPED_STRING |
| 200 | + |
| 201 | +DIGIT: "0" .. "9" |
| 202 | + |
| 203 | +NON_ZERO_DIGIT: "1" .. "9" |
| 204 | + |
| 205 | +LETTER: "a".."z" | "A".."Z" |
| 206 | + |
| 207 | +ID_START: LETTER | "_" |
| 208 | + |
| 209 | +IDENT: ID_START (ID_START | DIGIT)* |
| 210 | + |
| 211 | +WHITESPACE: " " | "\t" | "\f" | "\n" |
| 212 | +%ignore WHITESPACE+ |
| 213 | + |
| 214 | +COMMENT: "//" /[^\n]/* | "/*" /(\S|\s)*?/ "*/" |
| 215 | +%ignore COMMENT |
0 commit comments