Skip to content

Commit 93ad8d7

Browse files
make a 2-pass assembler to get layout/labels right, fixes #26, fixes #22
1 parent e5b9687 commit 93ad8d7

File tree

5 files changed

+176
-29
lines changed

5 files changed

+176
-29
lines changed

demo.S

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,16 @@ textstart: ld r0, r1, 0 # a comment!
1818
rsh r0, r1, 42
1919
move r0, r1
2020
move r0, 42
21+
move r0, textstart # moves abs addr of textstart to r0
2122
stage_rst
2223
stage_inc 42
2324
stage_dec 23
2425

25-
# jumping to labels not supported yet
26-
jumpr -1, 42, lt
27-
jumpr +1, 23, GE
28-
jump 0
26+
rel_b: jumpr -1, 42, lt
27+
jumpr rel_b, 42, LT
28+
jumpr rel_f, 23, ge
29+
rel_f: jumpr +1, 23, GE
30+
jump textstart
2931
jump 0, eq
3032
jump 0, OV
3133
jump r0

esp32_ulp/assemble.py

Lines changed: 108 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,97 @@
77

88
TEXT, DATA, BSS = 'text', 'data', 'bss'
99

10+
REL, ABS = 0, 1
11+
12+
13+
class SymbolTable:
14+
def __init__(self, symbols, bases):
15+
self._symbols = symbols
16+
self._bases = bases
17+
self._pass = None
18+
19+
def set_pass(self, _pass):
20+
self._pass = _pass
21+
22+
def set_bases(self, bases):
23+
self._bases = bases
24+
25+
def set_from(self, from_section, from_offset):
26+
self._from_section, self._from_offset = from_section, from_offset
27+
28+
def get_from(self):
29+
return self._from_section, self._from_offset
30+
31+
def set_sym(self, symbol, stype, section, value):
32+
entry = (stype, section, value)
33+
if symbol in self._symbols and entry != self._symbols[symbol]:
34+
raise Exception('redefining symbol %s with different value %r -> %r.' % (label, self._symbols[symbol], entry))
35+
self._symbols[symbol] = entry
36+
37+
def has_sym(self, symbol):
38+
return symbol in self._symbols
39+
40+
def get_sym(self, symbol):
41+
try:
42+
entry = self._symbols[symbol]
43+
except KeyError:
44+
if self._pass == 1:
45+
entry = (REL, TEXT, 0) # for a dummy, this is good enough
46+
else:
47+
raise
48+
return entry
49+
50+
def dump(self):
51+
for symbol, entry in self._symbols.items():
52+
print(symbol, entry)
53+
54+
def to_abs_addr(self, section, offset):
55+
try:
56+
base = self._bases[section]
57+
except KeyError:
58+
if self._pass == 1:
59+
base = 0 # for a dummy this is good enough
60+
else:
61+
raise
62+
return base + offset
63+
64+
def resolve_absolute(self, symbol):
65+
if isinstance(symbol, str):
66+
stype, section, value = self.get_sym(symbol)
67+
elif isinstance(symbol, tuple):
68+
stype, section, value = symbol
69+
else:
70+
raise TypeError
71+
if stype == REL:
72+
return self.to_abs_addr(section, value)
73+
if stype == ABS:
74+
return value
75+
raise TypeError(stype)
76+
77+
def resolve_relative(self, symbol):
78+
if isinstance(symbol, str):
79+
sym_type, sym_section, sym_value = self.get_sym(symbol)
80+
elif isinstance(symbol, tuple):
81+
sym_type, sym_section, sym_value = symbol
82+
else:
83+
raise TypeError
84+
if sym_type == REL:
85+
sym_addr = self.to_abs_addr(sym_section, sym_value)
86+
elif sym_type == ABS:
87+
sym_addr = sym_value
88+
from_addr = self.to_abs_addr(self._from_section, self._from_offset)
89+
return sym_addr - from_addr
90+
1091

1192
class Assembler:
1293

13-
def __init__(self):
14-
self.symbols = {}
94+
def __init__(self, symbols=None, bases=None):
95+
self.symbols = SymbolTable(symbols or {}, bases or {})
96+
opcodes.symbols = self.symbols # XXX dirty hack
97+
98+
def init(self, a_pass):
99+
self.a_pass = a_pass
100+
self.symbols.set_pass(a_pass)
15101
self.sections = dict(text=[], data=[])
16102
self.offsets = dict(text=0, data=0, bss=0)
17103
self.section = TEXT
@@ -78,10 +164,18 @@ def finalize_sections(self):
78164
if s is not BSS:
79165
self.sections[s].append(fill)
80166

167+
def compute_bases(self):
168+
bases = {}
169+
addr = 0
170+
# lay out sections in this order:
171+
for s in [TEXT, DATA, BSS]: # TODO: more flexibility for custom sections
172+
bases[s] = addr
173+
addr += self.offsets[s] // 4 # 32bit word addresses
174+
return bases
175+
81176
def dump(self):
82177
print("Symbols:")
83-
for label, section_offset in sorted(self.symbols.items()):
84-
print(label, section_offset)
178+
self.symbols.dump()
85179
print("%s section:" % TEXT)
86180
for t in self.sections[TEXT]:
87181
print("%08x" % int.from_bytes(t, 'little'))
@@ -134,12 +228,11 @@ def d_word(self, *args):
134228
def d_long(self, *args):
135229
self.append_data(4, args)
136230

137-
def assemble(self, lines):
231+
def assembler_pass(self, lines):
138232
for label, opcode, args in self.parse(lines):
233+
self.symbols.set_from(self.section, self.offsets[self.section] // 4)
139234
if label is not None:
140-
if label in self.symbols:
141-
raise Exception('label %s is already defined.' % label)
142-
self.symbols[label] = (self.section, self.offsets[self.section] // 4)
235+
self.symbols.set_sym(label, REL, *self.symbols.get_from())
143236
if opcode is not None:
144237
if opcode[0] == '.':
145238
# assembler directive
@@ -159,3 +252,10 @@ def assemble(self, lines):
159252
raise Exception('Unknown opcode or directive: %s' % opcode)
160253
self.finalize_sections()
161254

255+
def assemble(self, lines):
256+
self.init(1) # pass 1 is only to get the symbol table right
257+
self.assembler_pass(lines)
258+
self.symbols.set_bases(self.compute_bases())
259+
self.init(2) # now we know all symbols and bases, do the real assembler pass, pass 2
260+
self.assembler_pass(lines)
261+

esp32_ulp/opcodes.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
from .soc import *
99

10+
# XXX dirty hack: use a global for the symbol table
11+
symbols = None
12+
1013
# Opcodes, Sub-Opcodes, Modes, ...
1114

1215
OPCODE_WR_REG = 1
@@ -250,7 +253,7 @@ def make_ins(layout):
250253

251254
# assembler opcode definitions
252255

253-
REG, IMM, COND = 0, 1, 2
256+
REG, IMM, COND, SYM = 0, 1, 2, 3
254257
ARG = namedtuple('ARG', ('type', 'value', 'raw'))
255258

256259

@@ -276,25 +279,44 @@ def arg_qualify(arg):
276279
return ARG(IMM, int(arg), arg)
277280
except ValueError:
278281
pass
279-
raise TypeError('arg_qualify: unsupported arg type: %s' % arg)
282+
entry = symbols.get_sym(arg)
283+
return ARG(SYM, entry, arg)
280284

281285

282286
def get_reg(arg):
283-
arg = arg_qualify(arg)
287+
if isinstance(arg, str):
288+
arg = arg_qualify(arg)
284289
if arg.type == REG:
285290
return arg.value
286291
raise TypeError('wanted: register, got: %s' % arg.raw)
287292

288293

289294
def get_imm(arg):
290-
arg = arg_qualify(arg)
295+
if isinstance(arg, str):
296+
arg = arg_qualify(arg)
297+
if arg.type == IMM:
298+
return arg.value
299+
if arg.type == SYM:
300+
return symbols.resolve_absolute(arg.value)
301+
raise TypeError('wanted: immediate, got: %s' % arg.raw)
302+
303+
304+
get_abs = get_imm
305+
306+
307+
def get_rel(arg):
308+
if isinstance(arg, str):
309+
arg = arg_qualify(arg)
291310
if arg.type == IMM:
292311
return arg.value
312+
if arg.type == SYM:
313+
return symbols.resolve_relative(arg.value)
293314
raise TypeError('wanted: immediate, got: %s' % arg.raw)
294315

295316

296317
def get_cond(arg):
297-
arg = arg_qualify(arg)
318+
if isinstance(arg, str):
319+
arg = arg_qualify(arg)
298320
if arg.type == COND:
299321
return arg.value
300322
raise TypeError('wanted: condition, got: %s' % arg.raw)
@@ -429,10 +451,10 @@ def i_move(reg_dest, reg_imm_src):
429451
_alu_reg.sub_opcode = SUB_OPCODE_ALU_REG
430452
_alu_reg.opcode = OPCODE_ALU
431453
return _alu_reg.all
432-
if src.type == IMM:
454+
if src.type == IMM or src.type == SYM:
433455
_alu_imm.dreg = dest
434456
_alu_imm.sreg = 0
435-
_alu_imm.imm = src.value
457+
_alu_imm.imm = get_abs(src)
436458
_alu_imm.unused = 0
437459
_alu_imm.sel = ALU_SEL_MOV
438460
_alu_imm.sub_opcode = SUB_OPCODE_ALU_IMM
@@ -457,10 +479,10 @@ def _alu3(reg_dest, reg_src1, reg_imm_src2, alu_sel):
457479
_alu_reg.sub_opcode = SUB_OPCODE_ALU_REG
458480
_alu_reg.opcode = OPCODE_ALU
459481
return _alu_reg.all
460-
if src2.type == IMM:
482+
if src2.type == IMM or src2.type == SYM:
461483
_alu_imm.dreg = dest
462484
_alu_imm.sreg = src1
463-
_alu_imm.imm = src2.value
485+
_alu_imm.imm = get_abs(src2)
464486
_alu_imm.unused = 0
465487
_alu_imm.sel = alu_sel
466488
_alu_imm.sub_opcode = SUB_OPCODE_ALU_IMM
@@ -546,9 +568,9 @@ def i_jump(target, condition='--'):
546568
jump_type = BX_JUMP_TYPE_DIRECT
547569
else:
548570
raise ValueError("invalid flags condition")
549-
if target.type == IMM:
571+
if target.type == IMM or target.type == SYM:
550572
_bx.dreg = 0
551-
_bx.addr = target.value
573+
_bx.addr = get_abs(target)
552574
_bx.unused = 0
553575
_bx.reg = 0
554576
_bx.type = jump_type
@@ -568,7 +590,7 @@ def i_jump(target, condition='--'):
568590

569591

570592
def i_jumpr(offset, threshold, condition):
571-
offset = get_imm(offset)
593+
offset = get_rel(offset)
572594
threshold = get_imm(threshold)
573595
condition = get_cond(condition)
574596
if condition == 'lt':
@@ -587,7 +609,7 @@ def i_jumpr(offset, threshold, condition):
587609

588610

589611
def i_jumps(offset, threshold, condition):
590-
offset = get_imm(offset)
612+
offset = get_rel(offset)
591613
threshold = get_imm(threshold)
592614
condition = get_cond(condition)
593615
if condition == 'lt':

tests/assemble.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from esp32_ulp.assemble import Assembler, TEXT, DATA, BSS
1+
from esp32_ulp.assemble import Assembler, TEXT, DATA, BSS, REL, ABS
22

33
src = """\
44
@@ -31,9 +31,10 @@ def test_parse():
3131
def test_assemble():
3232
a = Assembler()
3333
a.assemble(src)
34-
assert {'start', 'end'} <= set(a.symbols)
35-
assert a.symbols['start'] == (TEXT, 0)
36-
assert a.symbols['end'] == (TEXT, 4)
34+
assert a.symbols.has_sym('start')
35+
assert a.symbols.has_sym('end')
36+
assert a.symbols.get_sym('start') == (REL, TEXT, 0)
37+
assert a.symbols.get_sym('end') == (REL, TEXT, 4)
3738
assert len(b''.join(a.sections[TEXT])) == 16 # 4 instructions * 4B
3839
assert len(a.sections[DATA]) == 0
3940
assert a.offsets[BSS] == 0

tests/compat/symbols.S

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
.text
2+
3+
start: move r0, data0
4+
move r1, data1
5+
6+
# count from 0 .. 42 in stage register
7+
stage_rst
8+
loop1: stage_inc 1
9+
jumps loop1, 42, lt
10+
11+
# count from 0 .. 42 in r0
12+
move r0, 0
13+
loop2: add r0, r0, 1
14+
jumpr loop2, 42, lt
15+
16+
end: jump start
17+
18+
.data
19+
20+
data0: .long 1000
21+
data1: .long 1001
22+

0 commit comments

Comments
 (0)