Skip to content

Commit 5804a43

Browse files
committed
Replace hand-made C parser with the lark parser-generator implementation
1 parent f8150dd commit 5804a43

File tree

10 files changed

+613
-872
lines changed

10 files changed

+613
-872
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
*.pyc
22
__pycache__/
3+
env/
4+
5+
# License incompatible
6+
compiler/parser_gen.py
37

README_C.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@ The compiler is invoked by calling `compiler_main.py`.
1414

1515
Command line parameters:
1616
```
17-
usage: compiler_main.py [-h] [-E] [-S] [--world-dir WORLD_DIR] [--as_zip]
17+
usage: compiler_main.py [-h] [-E] [-S] [--world-dir WORLD_DIR] [--as-zip]
1818
[--namespace NAMESPACE] [--rem-existing] [--debug]
1919
[--stack STACK] [--arg ARG]
2020
[--place-location PLACE_LOCATION] [--enable-sync]
2121
[--page-size PAGE_SIZE] [--setup-on-load]
2222
[--spawn-location SPAWN_LOCATION]
2323
[--pack-description PACK_DESCRIPTION]
24+
[--extern EXTERN]
2425
file
2526
2627
positional arguments:
@@ -32,7 +33,7 @@ optional arguments:
3233
-S Don't run assembler. Outputs ASM to stdout
3334
--world-dir WORLD_DIR
3435
World Directory
35-
--as_zip Write datapack as zip file
36+
--as-zip Write datapack as zip file
3637
--namespace NAMESPACE
3738
Function namespace
3839
--rem-existing Remove existing functions in namespace
@@ -49,8 +50,13 @@ optional arguments:
4950
Location to spawn hidden armor stand
5051
--pack-description PACK_DESCRIPTION
5152
Datapack description
53+
--extern EXTERN Specify external symbol
5254
```
5355

56+
You will need to generate the standalone parser (from [Lark](https://github.com/lark-parser/lark)) using the `./compiler/rebuild-grammar.sh` script.
57+
The Lark python package needs to be installed, `pip` can be used on the `requirements.txt` file. It is recommended to use `virtualenv`.
58+
Example: `virtualenv env --python=python3 && source env/bin/activate && pip install -r requirements.txt`
59+
5460
There are some examples in the [examples](https://github.com/simon816/Command-Block-Assembly/tree/master/examples) directory.
5561

5662
The [mclib.h](https://github.com/simon816/Command-Block-Assembly/blob/master/compiler/include/mclib.h) file

compiler/compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ def compile(self, program):
2222
visitor.visit_program(program)
2323
return self.writer.get_output()
2424

25+
def get_type_names(self):
26+
# bit hacky
27+
tmp = CompilerVisitor(None)
28+
return tmp.types.types.keys()
29+
2530
def load_libs(self, visitor):
2631
from .lib import libs
2732
for lib_name, exports in libs.items():

compiler/grammar.lark

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
// Lark: https://github.com/lark-parser/lark
2+
3+
// Mostly based on syntax specified in:
4+
// https://github.com/antlr/grammars-v4/blob/master/c/C.g4
5+
6+
// Special hook for the preprocessor so we can use constant_expression
7+
preprocessor_hook: "%{$PREPROCESSOR" constant_expression "}%"
8+
9+
program: (external_declaration* | preprocessor_hook)
10+
11+
pragma: "_Pragma" "(" STRING_LITERAL ")"
12+
13+
?external_declaration: function_definition | declaration | ";" | pragma
14+
15+
function_definition: decl_specifier declarator block_statement
16+
17+
declaration: decl_specifier init_declarator_list ";"
18+
| decl_specifier ";"
19+
20+
init_declarator_list: init_declarator ("," init_declarator)*
21+
22+
init_declarator: declarator
23+
| declarator "=" initializer
24+
25+
initializer: assignment_expression
26+
| "{" initializer_list ","? "}" -> compound_initializer
27+
28+
initializer_list: initializer_item ("," initializer_item)*
29+
initializer_item: designation? initializer
30+
?designation: designator_list "="
31+
?designator_list: designator designator_list | designator
32+
designator: "[" constant_expression "]" -> index_member_reference
33+
| "." identifier_expr -> name_member_reference
34+
35+
declarator: STAR* direct_declarator
36+
37+
direct_declarator: identifier_expr
38+
| identifier_expr "[" assignment_expression? "]" -> array_declarator
39+
| identifier_expr "(" param_list? ")" -> func_declarator
40+
41+
param_list: param_declaration ("," param_declaration)* ("," VARARGS)?
42+
43+
param_declaration: decl_specifier declarator
44+
| decl_specifier STAR* -> unnamed_param_declaration
45+
46+
// Use epsilon in child rules rather than make optional so we don't
47+
// loose it from the tree
48+
decl_specifier: storage_class_specifier type_qualifier type_specifier
49+
storage_class_specifier: TYPEDEF | STATIC |
50+
type_qualifier: CONST |
51+
52+
?type_specifier: struct_specifier | typedef_name
53+
54+
typedef_name: TYPEDEF_NAME
55+
TYPEDEF_NAME.2: IDENT
56+
57+
?struct_specifier: struct_spec_reference | struct_spec_declaration
58+
struct_spec_reference: "struct" identifier_expr
59+
struct_spec_declaration: "struct" identifier_expr? "{" struct_declaration_list "}"
60+
61+
struct_declaration_list: struct_declaration+
62+
struct_declaration: type_specifier struct_declarator_list? ";"
63+
64+
struct_declarator_list: struct_declarator ("," struct_declarator)*
65+
66+
?struct_declarator: declarator
67+
68+
type_name: type_specifier STAR*
69+
70+
// So the transformer can distinguish between block-level declarations
71+
// and global
72+
block_declaration: declaration
73+
block_statement: "{" (statement | block_declaration)* "}"
74+
75+
?statement: block_statement | statement_no_block
76+
77+
?statement_no_block: labelled_statement
78+
| expression? ";" -> expression_statement
79+
| "if" "(" expression ")" statement ["else" statement] -> if_statement
80+
| "switch" "(" expression ")" "{" switch_case_fragment* "}" -> switch_statement
81+
| "while" "(" expression ")" statement -> while_statement
82+
| "do" statement "while" "(" expression ")" ";" -> do_while_statement
83+
| "for" "(" expression? ";" expression? ";" expression? ")" statement -> for_statement
84+
| "goto" identifier_expr ";" -> goto_statement
85+
| "continue" ";" -> continue_statement
86+
| "break" ";" -> break_statement
87+
| "return" expression? ";" -> return_statement
88+
| "sync" ";" -> sync_statement
89+
| pragma
90+
91+
switch_case_fragment: "case" constant_expression ":" switch_case_body?
92+
| "default" ":" switch_case_body? -> switch_default_fragment
93+
?switch_case_body: block_statement | statement_no_block+
94+
95+
?labelled_statement: identifier_expr ":" statement -> label_statement
96+
// | "case" constant_expression ":" statement -> case_statement
97+
// | "default" ":" statement -> default_statement
98+
99+
?expression: assignment_expression
100+
| expression "," assignment_expression
101+
102+
?constant_expression: conditional_expression
103+
104+
?assignment_expression: conditional_expression
105+
| unary_expression (ASSIGN | ASSIGN_OP) assignment_expression
106+
107+
?conditional_expression: logical_or_expression ["?" expression ":" conditional_expression]
108+
109+
?logical_or_expression: logical_and_expression
110+
| logical_or_expression LOG_OR_OP logical_and_expression -> binop_expr
111+
112+
?logical_and_expression: inclusive_or_expression
113+
| logical_and_expression LOG_AND_OP inclusive_or_expression -> binop_expr
114+
115+
?inclusive_or_expression: exclusive_or_expression
116+
| inclusive_or_expression OR_OP exclusive_or_expression -> binop_expr
117+
118+
?exclusive_or_expression: and_expression
119+
| exclusive_or_expression XOR_OP and_expression -> binop_expr
120+
121+
?and_expression: equality_expression
122+
| and_expression AND_OP equality_expression -> binop_expr
123+
124+
?equality_expression: relational_expression
125+
| equality_expression EQ relational_expression -> binop_expr
126+
| equality_expression NEQ relational_expression -> binop_expr
127+
128+
?relational_expression: shift_expression
129+
| relational_expression REL_OP shift_expression -> binop_expr
130+
131+
?shift_expression: additive_expression
132+
| shift_expression SHIFT_OP additive_expression -> binop_expr
133+
134+
?additive_expression: multiplicative_expression
135+
| additive_expression ADD_OP multiplicative_expression -> binop_expr
136+
137+
?multiplicative_expression: cast_expression
138+
| multiplicative_expression (STAR | MUL_OP) cast_expression -> binop_expr
139+
140+
?cast_expression: "(" type_name ")" cast_expression
141+
| unary_expression
142+
143+
?unary_expression: postfix_expression
144+
| INCREMENT_OP unary_expression -> pre_increment_expr
145+
| UNARY_OP cast_expression
146+
| "sizeof" ( "(" type_name ")" | unary_expression ) -> sizeof_expr
147+
148+
?postfix_expression: primary_expression
149+
| postfix_expression "[" expression "]" -> array_subscript_expr
150+
| postfix_expression "(" (assignment_expression ("," assignment_expression)*)? ")" -> function_call_expr
151+
| postfix_expression (DOT | ARROW) identifier_expr -> member_access_expr
152+
| postfix_expression INCREMENT_OP -> post_increment_expr
153+
154+
?primary_expression: identifier_expr
155+
| INT_CONSTANT -> int_literal
156+
| STRING_LITERAL+ -> string_literal
157+
| "(" expression ")"
158+
159+
160+
identifier_expr: IDENT
161+
162+
VARARGS: "..."
163+
164+
TYPEDEF: "typedef"
165+
STATIC: "static"
166+
CONST: "const"
167+
168+
STAR.0: "*"
169+
EQ.2: "=="
170+
ASSIGN: "="
171+
ASSIGN_OP: ASSIGN | "*=" | "/=" | "%=" | "+=" | "-=" | "<<=" | ">>=" | "&=" | "^=" | "|="
172+
173+
LOG_OR_OP: "||"
174+
LOG_AND_OP: "&&"
175+
OR_OP: "|"
176+
XOR_OP: "^"
177+
AND_OP: "&"
178+
NEQ: "!="
179+
REL_OP: "<=" | ">=" | "<" | ">"
180+
SHIFT_OP: "<<" | ">>"
181+
ADD_OP: "+" | "-"
182+
MUL_OP: "*" | "/" | "%"
183+
UNARY_OP: "&" | "*" | "+" | "-" | "~" | "!"
184+
INCREMENT_OP: "++" | "--"
185+
DOT: "."
186+
ARROW: "->"
187+
188+
INT_CONSTANT: DEC_CONSTANT | OCT_CONSTANT | HEX_CONSTANT | BIN_CONSTANT | CHAR_CONSTANT | "0"
189+
DEC_CONSTANT: NON_ZERO_DIGIT DIGIT*
190+
OCT_CONSTANT: "0" ("0".."7")+
191+
HEX_CONSTANT: "0" ("x" | "X") ("0".."9" | "A".."F" | "a".."f")+
192+
BIN_CONSTANT: "0" ("b" | "B") ("0" | "1")+
193+
194+
SINGLE_CHAR: (/(<?!\\)./ | "\\" /([abefnrtv\\'"?]|x[\da-fA-F]+|[0-7]{1,3})/ )
195+
196+
CHAR_CONSTANT: "'" SINGLE_CHAR "'"
197+
198+
%import common.ESCAPED_STRING
199+
STRING_LITERAL: ESCAPED_STRING
200+
201+
DIGIT: "0" .. "9"
202+
203+
NON_ZERO_DIGIT: "1" .. "9"
204+
205+
LETTER: "a".."z" | "A".."Z"
206+
207+
ID_START: LETTER | "_"
208+
209+
IDENT: ID_START (ID_START | DIGIT)*
210+
211+
WHITESPACE: " " | "\t" | "\f" | "\n"
212+
%ignore WHITESPACE+
213+
214+
COMMENT: "//" /[^\n]/* | "/*" /(\S|\s)*?/ "*/"
215+
%ignore COMMENT

0 commit comments

Comments
 (0)