Skip to content

Commit 22f9ee4

Browse files
Merge pull request #65 from MarcellPerger1/add-typechecking
Add name resolution and scopes
2 parents e3cb48a + 784a729 commit 22f9ee4

File tree

12 files changed

+4550
-12
lines changed

12 files changed

+4550
-12
lines changed

main_example_2.st

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
global false = 0;
2+
global true = 1; // temporary
3+
/*intrinsic*/def len(/*list*/val a) {}
4+
/*intrinsic*/def delitem(/*list*/val a, number idx) {}
5+
6+
global[] psHeap = [];
7+
global[] _psHeap_freelist = [];
8+
9+
def _PsHeap_FinalizeObj(number ptr) {
10+
// Dispatch finalizer here, decref components, etc.
11+
}
12+
13+
global __result__; // TODO: some syntax sugar for return?
14+
def PsHeap_AllocWord() {
15+
if len(_psHeap_freelist) > 0 {
16+
__result__ = _psHeap_freelist[0];
17+
delitem(_psHeap_freelist, 0);
18+
if COUNT_REFS {
19+
refStack[__result__] = 1;
20+
}
21+
} else {
22+
// TODO: figure out list syntax
23+
psHeap.append('');
24+
__result__ = len(psHeap);
25+
if COUNT_REFS {
26+
refStack.append(1);
27+
}
28+
}
29+
}
30+
/** Note: Doesn't finalize! */
31+
def PsHeap_FreeWord(number ptr) {
32+
psHeap[ptr] = ''; // Next person can't read what was here
33+
if COUNT_REFS {
34+
refStack[ptr] = 0;
35+
}
36+
_psHeap_freelist.append(ptr);
37+
}
38+
def PsHeap_DeleteWord(number ptr) {
39+
_PsHeap_FinalizeObj(ptr);
40+
PsHeap_FreeWord(ptr);
41+
}
42+
43+
global[] refStack = [];
44+
def decref(number ptr) {
45+
if refStack[ptr] >= 0 { // negative refcount indicates immortal objects
46+
refStack[ptr] -= 1;
47+
if refStack[ptr] == 0 {
48+
PsHeap_DeleteWord(ptr);
49+
}
50+
}
51+
}
52+
def incref(number ptr) {
53+
if refStack[ptr] >= 0 { // negative refcount indicates immortal objects
54+
refStack[ptr] += 1;
55+
}
56+
}
57+
// implement GC that can do cycles
58+
59+
60+
global[] stack = [];
61+
global COUNT_REFS = false;
62+
63+
// opcodes from python version 3.8:
64+
65+
// opcodes from: https://unpyc.sourceforge.net/Opcodes.html
66+
// and https://harrisonmorgan.dev/2020/04/13/learning-cpython-bytecode-instructions/
67+
// also https://docs.python.org/3.8/library/dis.html#opcode-NOP for description
68+
// and https://github.com/python/cpython/blob/3.8/Lib/opcode.py#L58 for numbers
69+
70+
def NOP() {}
71+
72+
def POP_TOP() {
73+
if COUNT_REFS {
74+
decref(stack[len(stack)]);
75+
}
76+
delitem(stack, len(stack));
77+
}
78+
79+
def ROT_TWO() {
80+
let tos_idx = len(stack);
81+
let tos0 = stack[tos_idx];
82+
stack[tos_idx] = stack[tos_idx - 1];
83+
stack[tos_idx - 1] = tos0;
84+
}
85+
86+
def ROT_THREE() {
87+
// top = top-1
88+
// top-1 = top-2
89+
// top-2 = top
90+
// i.e. move 2nd and 3rd up and move top to 3rd
91+
let tos_idx = len(stack);
92+
let tos0 = stack[tos_idx];
93+
stack[tos_idx] = stack[tos_idx - 1];
94+
stack[tos_idx - 1] = stack[tos_idx - 2];
95+
stack[tos_idx - 2] = tos0;
96+
}
97+
98+
def DUP_TOP() {
99+
let value = stack[len(stack)];
100+
stack.append(value);
101+
if COUNT_REFS {
102+
incref(value);
103+
}
104+
}
105+
106+
def DUP_TOP_TWO() {
107+
let tos_idx = len(stack);
108+
let tos0 = stack[tos_idx];
109+
let tos1 = stack[tos_idx - 1];
110+
stack.append(tos1);
111+
stack.append(tos0); // keep tos0 at top
112+
if COUNT_REFS {
113+
incref(tos0);
114+
incref(tos1);
115+
}
116+
}
117+
118+
def ROT_FOUR() {
119+
// tos3 = tos
120+
// tos = tos1
121+
// tos1 = tos2
122+
// tos2 = tos3
123+
let tos_idx = len(stack);
124+
let tos0 = stack[tos_idx];
125+
stack[tos_idx] = stack[tos_idx - 1];
126+
stack[tos_idx - 1] = stack[tos_idx - 2];
127+
stack[tos_idx - 2] = stack[tos_idx - 3];
128+
stack[tos_idx - 3] = tos0;
129+
}

parser/typecheck/__init__.py

Whitespace-only changes.

parser/typecheck/typecheck.py

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
from __future__ import annotations
2+
3+
from dataclasses import dataclass, field
4+
from typing import Callable, TypeVar
5+
6+
from parser.astgen.ast_node import (
7+
AstNode, walk_ast, WalkableT, WalkerCallType, AstIdent, AstDeclNode,
8+
AstDefine, VarDeclType, VarDeclScope)
9+
from parser.astgen.astgen import AstGen
10+
from parser.common import BaseLocatedError, StrRegion
11+
from util import flatten_force
12+
13+
WT = TypeVar('WT', bound=WalkableT)
14+
VT = TypeVar('VT')
15+
16+
17+
class FilteredWalker:
18+
def __init__(self):
19+
self.enter_cbs: dict[type[WT] | type, list[Callable[[WT], bool | None]]] = {}
20+
self.exit_cbs: dict[type[WT] | type, list[Callable[[WT], bool | None]]] = {}
21+
self.both_cbs: dict[type[WT] | type, list[
22+
Callable[[WT, WalkerCallType], bool | None]]] = {}
23+
24+
def register_both(self, t: type[WT], fn: Callable[[WT, WalkerCallType], bool | None]):
25+
self.both_cbs.setdefault(t, []).append(fn)
26+
return self
27+
28+
def register_enter(self, t: type[WT], fn: Callable[[WT], bool | None]):
29+
self.enter_cbs.setdefault(t, []).append(fn)
30+
return self
31+
32+
def register_exit(self, t: type[WT], fn: Callable[[WT], bool | None]):
33+
self.exit_cbs.setdefault(t, []).append(fn)
34+
return self
35+
36+
def __call__(self, o: WalkableT, call_type: WalkerCallType):
37+
result = None
38+
# Call more specific ones first
39+
specific_cbs = self.enter_cbs if call_type == WalkerCallType.PRE else self.exit_cbs
40+
for fn in self._get_funcs(specific_cbs, type(o)):
41+
result = fn(o) or result
42+
for fn in self._get_funcs(self.both_cbs, type(o)):
43+
result = fn(o, call_type) or result
44+
return result
45+
46+
@classmethod
47+
def _get_funcs(cls, mapping: dict[type[WT] | type, list[VT]], tp: type[WT]) -> list[VT]:
48+
"""Also looks at superclasses/MRO"""
49+
return flatten_force(mapping.get(sub, []) for sub in tp.mro())
50+
51+
52+
@dataclass
53+
class TypeInfo:
54+
def __post_init__(self):
55+
assert type(self) != TypeInfo, "Cannot instantiate TypeInfo directly,use a subclass"
56+
57+
58+
@dataclass
59+
class ValType(TypeInfo):
60+
pass
61+
62+
63+
@dataclass
64+
class BoolType(TypeInfo):
65+
pass
66+
67+
68+
@dataclass
69+
class ListType(TypeInfo):
70+
pass
71+
72+
73+
@dataclass
74+
class VoidType(TypeInfo):
75+
"""The ``void`` type - represents 'there must not be a value here'.
76+
77+
For example, this is the return type of function that don't return anything
78+
(e.g. all regular user-defined scratch functions).
79+
"""
80+
81+
82+
@dataclass
83+
class FunctionType(TypeInfo):
84+
arg_types: list[TypeInfo]
85+
ret_type: TypeInfo
86+
87+
88+
@dataclass
89+
class NameInfo:
90+
decl_scope: Scope
91+
ident: str
92+
tp_info: TypeInfo
93+
# node: AstNode # <-- Why do we need this?
94+
is_param: bool = field(default=False, kw_only=True)
95+
96+
97+
@dataclass
98+
class FuncInfo(NameInfo):
99+
tp_info: FunctionType # Overrides types (doesn't change order)
100+
params_info: list[ParamInfo]
101+
# Can't just pass default_factory=Scope as it is only defined below
102+
subscope: Scope = field(default_factory=lambda: Scope())
103+
104+
@classmethod
105+
def from_param_info(
106+
cls, decl_scope: Scope, ident: str, params_info: list[ParamInfo],
107+
ret_type: TypeInfo, subscope: Scope = None):
108+
subscope = subscope or Scope()
109+
tp_info = FunctionType([p.tp for p in params_info], ret_type)
110+
return cls(decl_scope, ident, tp_info, params_info, subscope)
111+
112+
113+
@dataclass
114+
class ParamInfo:
115+
name: str
116+
tp: TypeInfo
117+
118+
119+
@dataclass
120+
class Scope:
121+
declared: dict[str, NameInfo] = field(default_factory=dict)
122+
used: dict[str, NameInfo] = field(default_factory=dict)
123+
"""Add references to outer scopes' variables that we use.
124+
(so type codegen/type-checker knows what each AstIdent refers to)"""
125+
126+
127+
class NameResolutionError(BaseLocatedError):
128+
pass
129+
130+
131+
# The reason `let` isn't used is because we don't want to imply similarity
132+
# between parameters as local variables (where none exists in Scratch).
133+
# Also, we might want to use `let` later as a modifier to bind it to
134+
# an actual local var.
135+
# Don't need to `sys.intern` these manually as Python automatically does
136+
# this for literals.
137+
PARAM_TYPES = {'number', 'string', 'val', 'bool'}
138+
139+
140+
# Variables:
141+
# - We can prevent usages before the variable is declared in 2 ways:
142+
# - Based on time: very sensible, like JS, but requires too many runtime features
143+
# - Based on location: somewhat makes sense except for inner functions -
144+
# they may be called later so should be able to access any variables.
145+
# - Or we can just ignore it (e.g. `var` in JS) and pretend everything was
146+
# declared at the top (but not assigned to - i.e. hoist `var foo;` to top).
147+
# To minimise accidental errors, option 1.2 is best
148+
# (errors shouldn't pass silently, and that method requires no special runtime)
149+
class NameResolver:
150+
def __init__(self, astgen: AstGen):
151+
self.astgen = astgen
152+
self.src = self.astgen.src
153+
self.top_scope: Scope | None = None
154+
155+
def _init(self):
156+
self.ast = self.astgen.parse()
157+
self.top_scope = Scope()
158+
159+
def run(self):
160+
if self.top_scope:
161+
return self.top_scope
162+
self._init()
163+
self.run_on_new_scope(self.ast.statements, curr_scope=self.top_scope)
164+
return self.top_scope
165+
166+
def run_on_new_scope(self, block: list[AstNode], parent_scopes: list[Scope] = None,
167+
curr_scope: Scope = None):
168+
def enter_ident(n: AstIdent):
169+
for s in scope_stack[::-1]: # Inefficient, creates a copy!
170+
if info := s.declared.get(n.id):
171+
curr_scope.used[n.id] = info
172+
return
173+
raise self.err(f"Name '{n.id}' is not defined", n.region)
174+
175+
def enter_decl(n: AstDeclNode):
176+
# Need semi-special logic here to prevent walking it walking
177+
# the AstIdent that is currently being declared.
178+
AstNode.walk_obj(n.value, walker) # Don't walk `n.ident`
179+
# Do this after walking (that is when the name is bound)
180+
ident = n.ident.id
181+
target_scope = curr_scope if n.scope == VarDeclScope.LET else self.top_scope
182+
if ident in target_scope.declared:
183+
raise self.err("Variable already declared", n.region)
184+
target_scope.declared[ident] = NameInfo(target_scope, ident, (
185+
ValType() if n.type == VarDeclType.VARIABLE else ListType()))
186+
return True
187+
188+
def enter_fn_decl(fn: AstDefine):
189+
ident = fn.ident.id
190+
if ident in curr_scope.declared:
191+
raise self.err("Function already declared", fn.ident.region)
192+
subscope = Scope()
193+
params: list[ParamInfo] = []
194+
for tp, param in fn.params:
195+
if tp.id not in PARAM_TYPES:
196+
raise self.err("Unknown parameter type", tp.region)
197+
if param.id in subscope.declared:
198+
raise self.err("There is already a parameter of this name", param.region)
199+
tp = BoolType() if param.id == 'bool' else ValType()
200+
subscope.declared[param.id] = NameInfo(subscope, param.id, tp, is_param=True)
201+
params.append(ParamInfo(param.id, tp))
202+
curr_scope.declared[ident] = info = FuncInfo.from_param_info(
203+
curr_scope, ident, params,
204+
ret_type=VoidType(), subscope=subscope)
205+
inner_funcs.append((info, fn)) # Store funcs for later walking
206+
# Skip walking body, only walk inner after collecting all declared
207+
# variables in outer scope so function can use all variables
208+
# declared in outer scope - even the ones declared below it)
209+
return True
210+
211+
curr_scope = curr_scope or Scope()
212+
scope_stack = parent_scopes or []
213+
scope_stack.append(curr_scope)
214+
inner_funcs: list[tuple[FuncInfo, AstDefine]] = []
215+
# Walk self
216+
walker = (FilteredWalker()
217+
.register_enter(AstIdent, enter_ident)
218+
.register_enter(AstDeclNode, enter_decl)
219+
.register_enter(AstDefine, enter_fn_decl))
220+
walk_ast(block, walker)
221+
# Walk sub-functions
222+
for fn_info, fn_decl in inner_funcs:
223+
fn_info.subscope = self.run_on_new_scope(
224+
fn_decl.body, scope_stack, fn_info.subscope)
225+
return scope_stack.pop() # Remove current scope from stack & return it
226+
227+
def err(self, msg: str, region: StrRegion):
228+
return NameResolutionError(msg, region, self.src)

test/common/common.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@
1414
from parser.cst.cstgen import CstGen, LocatedCstError
1515
from parser.lexer import Tokenizer
1616
from parser.lexer.tokens import Token, OpToken
17+
from parser.typecheck.typecheck import Scope, NameResolver, NameResolutionError
1718
from test.common.snapshottest import SnapshotTestCase
1819
from test.common.utils import TestCaseUtils
20+
from util.pformat import pformat
1921

2022

2123
def _strict_boundary_kwargs():
@@ -53,6 +55,7 @@ def setUpClass(cls) -> None:
5355
cls.format_dispatch.setdefault(Leaf, cls._tree_format)
5456
cls.format_dispatch.setdefault(Node, cls._tree_format)
5557
cls.format_dispatch.setdefault(AstNode, cls._tree_format)
58+
cls.format_dispatch.setdefault(Scope, pformat)
5659
super().setUpClass()
5760

5861
@classmethod
@@ -124,3 +127,13 @@ def assertFailsGracefullyAST(self, src: str):
124127
with self.assertRaises(LocatedAstError) as ctx:
125128
a.parse()
126129
return ctx.exception
130+
131+
# noinspection PyMethodMayBeStatic
132+
def getNameResolver(self, src: str):
133+
return NameResolver(AstGen(CstGen(Tokenizer(src))))
134+
135+
def assertNameResolveError(self, src: str):
136+
nr = self.getNameResolver(src)
137+
with self.assertRaises(NameResolutionError) as ctx:
138+
nr.run()
139+
return ctx.exception

0 commit comments

Comments
 (0)