-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlink.hy
More file actions
268 lines (219 loc) · 7.67 KB
/
link.hy
File metadata and controls
268 lines (219 loc) · 7.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
(import dataclasses [dataclass])
(import json)
(import os)
(import struct)
(import sys)
(import hy)
(import hy.models [Expression Integer Symbol])
(require hyrule.control [defmain unless])
(require hyrule.misc [of pun])
(import hyrule.hypprint [pprint])
(import models [CompiledFunction LinkedFunction Program Unit])
;; Information about a function, necessary and sufficient to link against it
;; Normally comes from units, but can also be injected by REPL
(defclass [dataclass] ProgramFunction []
#^ int id
#^ str name
#^ int argc
#^ int retc
)
;; Additional information not included in program or program fragment
(defclass [dataclass] LinkInfo []
#^ int bc-end
#^ (of dict str ProgramFunction) function-table
#^ (of dict str int) global-table
)
(defn link-program [units
output
builtin-functions
[repl-initial-state None]
[allow-no-main False]]
(setv bc-end 0)
(setv #^ (of dict str ProgramFunction)
function-table {})
(setv #^ (of dict str int)
global-table {})
(setv functions-to-compile [])
(setv program (Program :bytecode []
:functions []
:globals []))
(when (is-not repl-initial-state None)
(setv bc-end repl-initial-state.bc-end
function-table {#** repl-initial-state.function-table}
global-table {#** repl-initial-state.global-table}))
;; link:
;; - collect functions + globals
(for [unit units]
(for [f unit.functions]
(when (in f.name function-table)
(raise (Exception f"Multiple definitions of function '{f.name}'")))
(setv function-index (len function-table))
(setv (get function-table f.name)
(ProgramFunction :id function-index
:name f.name
:argc f.argc
:retc f.retc))
(functions-to-compile.append f)
)
(for [#(g value) (unit.globals.items)]
(if (is value None)
;; If a global has no value, it has been declared akin to 'extern' in C.
;; The compiler doesn't currently permit this, but variables in the REPL use this.
(get global-table g)
(do
(when (in g global-table)
(raise (Exception f"Multiple definitions of global variable '{g}'")))
;; Note: global-table may contain additional entries coming from repl-initial-state,
;; which will not be found in program.globals (and must not be added there)
(setv global-index (len global-table))
(setv (get global-table g) global-index)
(program.globals.append value)
))
)
)
;; resolve function calls
;; resolve global var IDs
(defn error [message]
(raise (Exception f"error: {message}")))
(for [f functions-to-compile]
(defn resolve [insn]
(cond
;; call
(= (get insn 0) 'call) (do
(setv [_ name argc retc] insn)
(defn check-retc [produces]
(unless (= produces retc)
(error f"{retc} results were expected, but function '{name}' produces {produces}")))
(try
(setv f (get function-table name))
(except [KeyError]
(raise (Exception f"unresolved function {name}") :from None)))
(unless (= argc f.argc)
(error f"Function '{name}' expects {f.argc} arguments, but {argc} were passed"))
(check-retc f.retc)
['call f.id])
;; getglobal/setglobal
(in (get insn 0) #{'getglobal 'setglobal}) (do
(setv [opcode name] insn)
[opcode (get global-table name)])
True insn
))
(setv f.body (lfor insn f.body (resolve insn)))
)
;; expand jump offsets to bytes
(defn instruction-length [insn]
;; 1 byte per opcode and each operand
;; except for branches & pushconst where the operand is 2 bytes
(if (in (get insn 0) #{'jmp 'jz 'pushconst})
3
(len insn)))
(for [f functions-to-compile]
(defn resolve [i insn]
(cond
;; getglobal/setglobal
(in (get insn 0) #{'jmp 'jz}) (do
(setv [opcode dist] insn)
;; distance is sum of lengths of instructions, starting at the next one
(setv start (+ i 1))
(setv end (+ start dist))
(defn block-length [block]
(sum (gfor insn block (instruction-length insn))))
(if (>= end start)
(setv dist-bytes (block-length (cut f.body start end)))
(setv dist-bytes (- (block-length (cut f.body end start))))
)
[opcode dist-bytes])
True insn
))
(setv f.body (lfor [i insn] (enumerate f.body) (resolve i insn)))
)
;; - lay out bytecode
(for [f functions-to-compile]
(setv func-body-len
(sum (gfor insn f.body (instruction-length insn))))
(setv f* (LinkedFunction :name f.name
:argc f.argc
:num-locals f.num-locals
:bytecode-offset bc-end))
(program.functions.append f*)
(setv program.bytecode (+ program.bytecode f.body)
bc-end (+ bc-end func-body-len))
)
;(pprint all-functions)
(pprint global-table)
;; (pprint function-table)
(pprint program)
(setv OPCODE-NUMBERS {
'pushconst 0
'zero 1
'drop 2
'getglobal 3
'setglobal 4
'getlocal 5
'setlocal 6
'call 10
'ret 13
'jmp 20
'jz 21
})
(when (is-not output None)
(if allow-no-main
;; In REPL mode, definitions generate program fragments with no main function
(setv main-func-idx (try (. function-table ["main"] id) (except [KeyError] 255)))
(setv main-func-idx (. function-table ["main"] id)))
(with [f (open (+ output ".tmp") "wb")]
;; helper function for writing binary data
(defn emit [format #* args]
(f.write (struct.pack format #* args)))
;; write header
(emit "<HBBBxxx"
bc-end
(len program.functions)
(len program.globals)
main-func-idx)
;; functions
(for [func program.functions]
(emit "<BBH" func.argc func.num-locals func.bytecode-offset))
;; globals
(for [value program.globals]
(emit "<h" value))
;; bytecode
(for [[opcode #* operands] program.bytecode]
;(f.write (bytes [(get OPCODE-NUMBERS opcode) #* operands])))
(cond
(in (str opcode) builtin-functions) (do
(assert (= (len operands) 0))
(emit "B" (get (get builtin-functions (str opcode)) "opcode")))
(in opcode #{'jmp 'jz 'pushconst}) (do
;; branch instructions & pushconst have a 16-bit operand
(emit "b" (get OPCODE-NUMBERS opcode))
(emit "h" #* operands))
True (do
(for [b [(get OPCODE-NUMBERS opcode) #* operands]]
(emit "B" b)))))
)
(os.rename (+ output ".tmp") output))
(pun (LinkInfo :!bc-end :!function-table :!global-table)))
(defmain []
(import argparse [ArgumentParser])
(setv parser (ArgumentParser))
(parser.add-argument "inputs" :nargs "+")
(parser.add-argument "-o" :dest "output" :required True)
(setv args (parser.parse-args))
(with [f (open "builtins.json")]
(setv builtin-functions (json.load f))
)
(setv units [])
(for [path args.inputs]
(with [f (open path)]
(setv [form] (hy.read-many f))
(setv unit (Unit.from-form form))
;; (pprint unit)
(units.append unit)
)
)
(link-program units
:output args.output
:builtin-functions builtin-functions
)
)