Skip to content

Commit aebd6c3

Browse files
Merge pull request #44 from MarcellPerger1/start-ast
Add AST generation
2 parents 697d10a + 2e8c982 commit aebd6c3

File tree

65 files changed

+1877
-151
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1877
-151
lines changed

.coveragerc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[report]
2+
exclude_also =
3+
if __name__ == ['"]__main__['"]:
4+
if TYPE_CHECKING:
5+
assert 0\b

.github/workflows/fuzzer.yml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,34 @@ on:
55
description: Number of iterations
66
type: number
77
default: 250000
8+
do_shard:
9+
description: Run it on 4 separate machines (`n` on each)?
10+
type: boolean
11+
default: false
812

913

1014
jobs:
11-
test:
15+
fuzzer:
1216
runs-on: ubuntu-latest
1317
strategy:
1418
matrix:
1519
py_version:
1620
- "3.10"
1721
- "3.11"
22+
- "3.12"
23+
do_shard:
24+
- ${{ inputs.do_shard }}
25+
shard_index: [0, 1, 2, 3]
26+
exclude:
27+
- do_shard: false
28+
include:
29+
- do_shard: false
30+
py_version: "3.10"
31+
- do_shard: false
32+
py_version: "3.11"
33+
- do_shard: false
34+
py_version: "3.12"
35+
name: ${{ inputs.do_shard && format('Run fuzzer (Python {0}, shard {1})', matrix.py_version, matrix.shard_index) || format('Run fuzzer (Python {0})', matrix.py_version) }}
1836
steps:
1937
- name: Checkout repository
2038
uses: actions/checkout@v4

.github/workflows/run_tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ jobs:
1515
py_version:
1616
- "3.10"
1717
- "3.11"
18+
- "3.12"
1819
steps:
1920
- name: Checkout repository
2021
uses: actions/checkout@v4

.idea/inspectionProfiles/project_inspections.xml

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fuzz.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import time
22

3+
from parser.astgen.astgen import AstGen
34
from parser.lexer.tokenizer import Tokenizer
45
from parser.cst.treegen import TreeGen
56
from parser.common.error import BaseParseError
@@ -29,7 +30,7 @@ def fuzz(buf):
2930
try:
3031
string = buf.decode("ascii")
3132
try:
32-
TreeGen(Tokenizer(string)).parse()
33+
AstGen(TreeGen(Tokenizer(string))).parse()
3334
except BaseParseError:
3435
pass
3536
except UnicodeDecodeError:
@@ -39,11 +40,12 @@ def fuzz(buf):
3940
if __name__ == '__main__':
4041
import argparse
4142
ap = argparse.ArgumentParser("fuzz.py", description="Runs a fuzzer for n iterations")
43+
# Use type=float as gh mobile cannot specify integers as workflow args
4244
ap.add_argument('-n', '--iterations', default=-1,
43-
type=int, help="Number of iterations to run pythonfuzz for")
45+
type=float, help="Number of iterations to run pythonfuzz for")
4446
ap.add_argument('-i', '--infinite',
4547
action='store_const', const=-1, dest='iterations')
4648
args = ap.parse_args()
4749

48-
fuzzer = Fuzzer(fuzz, dirs=['./pythonfuzz_corpus'], timeout=30, runs=args.iterations)
50+
fuzzer = Fuzzer(fuzz, dirs=['./pythonfuzz_corpus'], timeout=30, runs=int(args.iterations))
4951
fuzzer.start()

main.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import cProfile
22
import time
33

4+
from parser.astgen.astgen import AstGen
45
from util import readfile
56
from parser.cst.treegen import TreeGen
67
from parser.lexer import Tokenizer, print_tokens
@@ -14,7 +15,20 @@ def make_tree(src: str):
1415
PROFILER = True
1516

1617

17-
def run(src: str, idx: int = -1):
18+
def run(src: str, idx: int = -1, do_ast=True):
19+
node = ast_node = None
20+
ta1 = tp1 = ta0 = 0.0 # will be overwritten
21+
22+
def doit_trees():
23+
nonlocal node, tp1, ta0, ast_node, ta1
24+
treegen = TreeGen(tn)
25+
node = treegen.parse()
26+
tp1 = time.perf_counter()
27+
if do_ast:
28+
ta0 = time.perf_counter()
29+
ast_node = AstGen(treegen).parse()
30+
ta1 = time.perf_counter()
31+
1832
tn0 = time.perf_counter()
1933
tn = Tokenizer(src).tokenize()
2034
tn1 = time.perf_counter()
@@ -25,24 +39,29 @@ def run(src: str, idx: int = -1):
2539
tp0 = time.perf_counter()
2640
if PROFILER:
2741
with cProfile.Profile() as p:
28-
node = TreeGen(tn).parse()
29-
tp1 = time.perf_counter()
42+
doit_trees()
3043
p.dump_stats(f'perf_dump_{idx}.prof')
3144
else:
32-
node = TreeGen(tn).parse()
33-
tp1 = time.perf_counter()
45+
doit_trees()
3446
print('CST:')
3547
tpr_cst0 = time.perf_counter()
3648
tprint(node)
3749
tpr_cst1 = time.perf_counter()
50+
tpr_ast0 = tpr_ast1 = time.perf_counter()
51+
if do_ast:
52+
tprint(ast_node)
53+
tpr_ast1 = time.perf_counter()
3854
print(rf'Tokens done in {(tn1 - tn0) * 1000:.2f}ms')
3955
print(rf'Tokens_print done in {(tpr_tk1 - tpr_tk0) * 1000:.2f}ms')
4056
print(rf'CST done in {(tp1 - tp0) * 1000:.2f}ms')
4157
print(rf'CST_print done in {(tpr_cst1 - tpr_cst0) * 1000:.2f}ms')
58+
if do_ast:
59+
print(rf'AST done in {(ta1 - ta0) * 1000:.2f}ms')
60+
print(rf'AST_print done in {(tpr_ast1 - tpr_ast0) * 1000:.2f}ms')
4261

4362

4463
def main():
45-
run(readfile('main_example_0.st'), 0)
64+
run(readfile('main_example_0.st'), 0, do_ast=False)
4665
run(readfile('main_example_1.st'), 1)
4766

4867

parser/astgen/__init__.py

Whitespace-only changes.

parser/astgen/ast_node.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
from __future__ import annotations
2+
3+
from dataclasses import dataclass
4+
from enum import Enum
5+
6+
from ..common import HasRegion, StrRegion
7+
8+
__all__ = [
9+
"AstNode", "AstProgramNode", "VarDeclType", "AstDeclNode", "AstRepeat",
10+
"AstIf", "AstWhile", "AstAssign", "AstAugAssign", "AstDefine", "AstNumber",
11+
"AstString", "AstAnyName", "AstIdent", "AstAttrName", "AstAttribute",
12+
"AstItem", "AstCall", "AstOp", "AstBinOp", "AstUnaryOp",
13+
]
14+
15+
16+
@dataclass
17+
class AstNode(HasRegion):
18+
region: StrRegion
19+
name = None # type: str
20+
del name # So we get better error msg if we forget to add it to a class
21+
22+
23+
@dataclass
24+
class AstProgramNode(AstNode):
25+
name = 'program'
26+
statements: list[AstNode]
27+
28+
29+
# region ---- <Statements> ----
30+
class VarDeclType(Enum):
31+
LET = 'let'
32+
GLOBAL = 'global'
33+
34+
35+
@dataclass
36+
class AstDeclNode(AstNode):
37+
name = 'var_decl'
38+
type: VarDeclType
39+
decls: list[tuple[AstIdent, AstNode | None]]
40+
41+
42+
@dataclass
43+
class AstRepeat(AstNode):
44+
name = 'repeat'
45+
count: AstNode
46+
body: list[AstNode]
47+
48+
49+
@dataclass
50+
class AstIf(AstNode):
51+
name = 'if'
52+
cond: AstNode
53+
if_body: list[AstNode]
54+
# elseif = else{if
55+
else_body: list[AstNode] | None = None
56+
# ^ Separate cases for no block and empty block (can be else {} to easily
57+
# add extra blocks in scratch interface)
58+
59+
60+
@dataclass
61+
class AstWhile(AstNode):
62+
name = 'while'
63+
cond: AstNode
64+
body: list[AstNode]
65+
66+
67+
@dataclass
68+
class AstAssign(AstNode):
69+
name = '='
70+
target: AstNode
71+
source: AstNode
72+
73+
74+
@dataclass
75+
class AstAugAssign(AstNode):
76+
op: str # maybe attach a StrRegion to the location of the op??
77+
target: AstNode
78+
source: AstNode
79+
80+
@property
81+
def name(self):
82+
return self.op
83+
84+
85+
@dataclass
86+
class AstDefine(AstNode):
87+
name = 'def'
88+
89+
ident: AstIdent
90+
params: list[tuple[AstIdent, AstIdent]] # type, ident
91+
body: list[AstNode]
92+
# endregion ---- </Statements> ----
93+
94+
95+
# region ---- <Expressions> ----
96+
@dataclass
97+
class AstNumber(AstNode):
98+
# No real point in storing the string representation (could always StrRegion.resolve())
99+
value: float | int
100+
101+
102+
@dataclass
103+
class AstString(AstNode):
104+
value: str # Values with escapes, etc. resolved
105+
106+
107+
@dataclass
108+
class AstAnyName(AstNode):
109+
id: str
110+
111+
def __post_init__(self):
112+
if type(self) == AstAnyName:
113+
raise TypeError("AstAnyName must not be instantiated directly.")
114+
115+
116+
@dataclass
117+
class AstIdent(AstAnyName):
118+
name = 'ident'
119+
120+
121+
@dataclass
122+
class AstAttrName(AstAnyName):
123+
name = 'attr'
124+
125+
126+
@dataclass
127+
class AstAttribute(AstNode):
128+
name = '.'
129+
obj: AstNode
130+
attr: AstAttrName
131+
132+
133+
@dataclass
134+
class AstItem(AstNode):
135+
name = 'item'
136+
obj: AstNode
137+
index: AstNode
138+
139+
140+
@dataclass
141+
class AstCall(AstNode):
142+
name = 'call'
143+
obj: AstNode
144+
args: list[AstNode]
145+
146+
147+
@dataclass
148+
class AstOp(AstNode):
149+
op: str
150+
151+
152+
@dataclass
153+
class AstBinOp(AstOp):
154+
left: AstNode
155+
right: AstNode
156+
157+
valid_ops = [*'+-*/%', '**', '..', '||', '&&', # ops
158+
'==', '!=', '<', '>', '<=', '>=' # comparisons
159+
] # type: list[str]
160+
161+
def __post_init__(self):
162+
assert self.op in self.valid_ops
163+
164+
@property
165+
def name(self):
166+
return self.op
167+
168+
169+
@dataclass
170+
class AstUnaryOp(AstOp):
171+
operand: AstNode
172+
173+
valid_ops = ('+', '-', '!')
174+
175+
def __post_init__(self):
176+
assert self.op in self.valid_ops
177+
178+
@property
179+
def name(self):
180+
return self.op
181+
# endregion ---- </Expressions> ----

0 commit comments

Comments
 (0)