Skip to content

Commit 44e16ce

Browse files
Merge pull request #30 from ThomasWaldmann/twopass-assembly
make a 2-pass assembler to get layout/labels right
2 parents 10ca81e + 36e2632 commit 44e16ce

File tree

7 files changed

+212
-40
lines changed

7 files changed

+212
-40
lines changed

demo.S

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
.text
44

5+
.set constant42, 42
6+
57
textstart: ld r0, r1, 0 # a comment!
68
st r0, r1, 0 // another comment!
79
add r0, r1, r2
@@ -18,14 +20,17 @@ textstart: ld r0, r1, 0 # a comment!
1820
rsh r0, r1, 42
1921
move r0, r1
2022
move r0, 42
23+
move r0, textstart # moves abs addr of textstart to r0
24+
move r0, constant42
2125
stage_rst
2226
stage_inc 42
2327
stage_dec 23
2428

25-
# jumping to labels not supported yet
26-
jumpr -1, 42, lt
27-
jumpr +1, 23, GE
28-
jump 0
29+
rel_b: jumpr -1, 42, lt
30+
jumpr rel_b, 42, LT
31+
jumpr rel_f, 23, ge
32+
rel_f: jumpr +1, 23, GE
33+
jump textstart
2934
jump 0, eq
3035
jump 0, OV
3136
jump r0

esp32_ulp/assemble.py

Lines changed: 131 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,97 @@
77

88
TEXT, DATA, BSS = 'text', 'data', 'bss'
99

10+
REL, ABS = 0, 1
11+
12+
13+
class SymbolTable:
14+
def __init__(self, symbols, bases):
15+
self._symbols = symbols
16+
self._bases = bases
17+
self._pass = None
18+
19+
def set_pass(self, _pass):
20+
self._pass = _pass
21+
22+
def set_bases(self, bases):
23+
self._bases = bases
24+
25+
def set_from(self, from_section, from_offset):
26+
self._from_section, self._from_offset = from_section, from_offset
27+
28+
def get_from(self):
29+
return self._from_section, self._from_offset
30+
31+
def set_sym(self, symbol, stype, section, value):
32+
entry = (stype, section, value)
33+
if symbol in self._symbols and entry != self._symbols[symbol]:
34+
raise Exception('redefining symbol %s with different value %r -> %r.' % (label, self._symbols[symbol], entry))
35+
self._symbols[symbol] = entry
36+
37+
def has_sym(self, symbol):
38+
return symbol in self._symbols
39+
40+
def get_sym(self, symbol):
41+
try:
42+
entry = self._symbols[symbol]
43+
except KeyError:
44+
if self._pass == 1:
45+
entry = (REL, TEXT, 0) # for a dummy, this is good enough
46+
else:
47+
raise
48+
return entry
49+
50+
def dump(self):
51+
for symbol, entry in self._symbols.items():
52+
print(symbol, entry)
53+
54+
def to_abs_addr(self, section, offset):
55+
try:
56+
base = self._bases[section]
57+
except KeyError:
58+
if self._pass == 1:
59+
base = 0 # for a dummy this is good enough
60+
else:
61+
raise
62+
return base + offset
63+
64+
def resolve_absolute(self, symbol):
65+
if isinstance(symbol, str):
66+
stype, section, value = self.get_sym(symbol)
67+
elif isinstance(symbol, tuple):
68+
stype, section, value = symbol
69+
else:
70+
raise TypeError
71+
if stype == REL:
72+
return self.to_abs_addr(section, value)
73+
if stype == ABS:
74+
return value
75+
raise TypeError(stype)
76+
77+
def resolve_relative(self, symbol):
78+
if isinstance(symbol, str):
79+
sym_type, sym_section, sym_value = self.get_sym(symbol)
80+
elif isinstance(symbol, tuple):
81+
sym_type, sym_section, sym_value = symbol
82+
else:
83+
raise TypeError
84+
if sym_type == REL:
85+
sym_addr = self.to_abs_addr(sym_section, sym_value)
86+
elif sym_type == ABS:
87+
sym_addr = sym_value
88+
from_addr = self.to_abs_addr(self._from_section, self._from_offset)
89+
return sym_addr - from_addr
90+
1091

1192
class Assembler:
1293

13-
def __init__(self):
14-
self.symbols = {}
94+
def __init__(self, symbols=None, bases=None):
95+
self.symbols = SymbolTable(symbols or {}, bases or {})
96+
opcodes.symbols = self.symbols # XXX dirty hack
97+
98+
def init(self, a_pass):
99+
self.a_pass = a_pass
100+
self.symbols.set_pass(a_pass)
15101
self.sections = dict(text=[], data=[])
16102
self.offsets = dict(text=0, data=0, bss=0)
17103
self.section = TEXT
@@ -78,10 +164,18 @@ def finalize_sections(self):
78164
if s is not BSS:
79165
self.sections[s].append(fill)
80166

167+
def compute_bases(self):
168+
bases = {}
169+
addr = 0
170+
# lay out sections in this order:
171+
for s in [TEXT, DATA, BSS]: # TODO: more flexibility for custom sections
172+
bases[s] = addr
173+
addr += self.offsets[s] // 4 # 32bit word addresses
174+
return bases
175+
81176
def dump(self):
82177
print("Symbols:")
83-
for label, section_offset in sorted(self.symbols.items()):
84-
print(label, section_offset)
178+
self.symbols.dump()
85179
print("%s section:" % TEXT)
86180
for t in self.sections[TEXT]:
87181
print("%08x" % int.from_bytes(t, 'little'))
@@ -108,19 +202,34 @@ def d_data(self):
108202
def d_bss(self):
109203
self.section = BSS
110204

205+
def fill(self, section, amount, fill_byte):
206+
if fill_byte is not None and section is BSS:
207+
raise ValueError('fill in bss section not allowed')
208+
if section is TEXT: # TODO: text section should be filled with NOPs
209+
raise ValueError('fill/skip/align in text section not supported')
210+
fill = int(fill_byte or 0).to_bytes(1, 'little') * amount
211+
self.offsets[section] += len(fill)
212+
if section is not BSS:
213+
self.sections[section].append(fill)
214+
111215
def d_skip(self, amount, fill=None):
112-
s = self.section
113216
amount = int(amount)
114-
if fill is not None and s is BSS:
115-
raise ValueError('fill not allowed in section %s' % s)
116-
if s is BSS:
117-
self.append_section(amount)
118-
else:
119-
fill = int(fill or 0).to_bytes(1, 'little') * amount
120-
self.append_section(fill)
217+
self.fill(self.section, amount, fill)
121218

122219
d_space = d_skip
123220

221+
def d_align(self, align=4, fill=None):
222+
align = int(align)
223+
offs = self.offsets[self.section]
224+
mod = offs % align
225+
if mod:
226+
amount = align - mod
227+
self.fill(self.section, amount, fill)
228+
229+
def d_set(self, symbol, expr):
230+
value = int(expr) # TODO: support more than just integers
231+
self.symbols.set_sym(symbol, ABS, None, value)
232+
124233
def append_data(self, wordlen, args):
125234
data = [int(arg).to_bytes(wordlen, 'little') for arg in args]
126235
self.append_section(b''.join(data))
@@ -134,12 +243,11 @@ def d_word(self, *args):
134243
def d_long(self, *args):
135244
self.append_data(4, args)
136245

137-
def assemble(self, lines):
246+
def assembler_pass(self, lines):
138247
for label, opcode, args in self.parse(lines):
248+
self.symbols.set_from(self.section, self.offsets[self.section] // 4)
139249
if label is not None:
140-
if label in self.symbols:
141-
raise Exception('label %s is already defined.' % label)
142-
self.symbols[label] = (self.section, self.offsets[self.section] // 4)
250+
self.symbols.set_sym(label, REL, *self.symbols.get_from())
143251
if opcode is not None:
144252
if opcode[0] == '.':
145253
# assembler directive
@@ -159,3 +267,10 @@ def assemble(self, lines):
159267
raise Exception('Unknown opcode or directive: %s' % opcode)
160268
self.finalize_sections()
161269

270+
def assemble(self, lines):
271+
self.init(1) # pass 1 is only to get the symbol table right
272+
self.assembler_pass(lines)
273+
self.symbols.set_bases(self.compute_bases())
274+
self.init(2) # now we know all symbols and bases, do the real assembler pass, pass 2
275+
self.assembler_pass(lines)
276+

esp32_ulp/opcodes.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
from .soc import *
99

10+
# XXX dirty hack: use a global for the symbol table
11+
symbols = None
12+
1013
# Opcodes, Sub-Opcodes, Modes, ...
1114

1215
OPCODE_WR_REG = 1
@@ -250,7 +253,7 @@ def make_ins(layout):
250253

251254
# assembler opcode definitions
252255

253-
REG, IMM, COND = 0, 1, 2
256+
REG, IMM, COND, SYM = 0, 1, 2, 3
254257
ARG = namedtuple('ARG', ('type', 'value', 'raw'))
255258

256259

@@ -276,25 +279,44 @@ def arg_qualify(arg):
276279
return ARG(IMM, int(arg), arg)
277280
except ValueError:
278281
pass
279-
raise TypeError('arg_qualify: unsupported arg type: %s' % arg)
282+
entry = symbols.get_sym(arg)
283+
return ARG(SYM, entry, arg)
280284

281285

282286
def get_reg(arg):
283-
arg = arg_qualify(arg)
287+
if isinstance(arg, str):
288+
arg = arg_qualify(arg)
284289
if arg.type == REG:
285290
return arg.value
286291
raise TypeError('wanted: register, got: %s' % arg.raw)
287292

288293

289294
def get_imm(arg):
290-
arg = arg_qualify(arg)
295+
if isinstance(arg, str):
296+
arg = arg_qualify(arg)
297+
if arg.type == IMM:
298+
return arg.value
299+
if arg.type == SYM:
300+
return symbols.resolve_absolute(arg.value)
301+
raise TypeError('wanted: immediate, got: %s' % arg.raw)
302+
303+
304+
get_abs = get_imm
305+
306+
307+
def get_rel(arg):
308+
if isinstance(arg, str):
309+
arg = arg_qualify(arg)
291310
if arg.type == IMM:
292311
return arg.value
312+
if arg.type == SYM:
313+
return symbols.resolve_relative(arg.value)
293314
raise TypeError('wanted: immediate, got: %s' % arg.raw)
294315

295316

296317
def get_cond(arg):
297-
arg = arg_qualify(arg)
318+
if isinstance(arg, str):
319+
arg = arg_qualify(arg)
298320
if arg.type == COND:
299321
return arg.value
300322
raise TypeError('wanted: condition, got: %s' % arg.raw)
@@ -429,10 +451,10 @@ def i_move(reg_dest, reg_imm_src):
429451
_alu_reg.sub_opcode = SUB_OPCODE_ALU_REG
430452
_alu_reg.opcode = OPCODE_ALU
431453
return _alu_reg.all
432-
if src.type == IMM:
454+
if src.type == IMM or src.type == SYM:
433455
_alu_imm.dreg = dest
434456
_alu_imm.sreg = 0
435-
_alu_imm.imm = src.value
457+
_alu_imm.imm = get_abs(src)
436458
_alu_imm.unused = 0
437459
_alu_imm.sel = ALU_SEL_MOV
438460
_alu_imm.sub_opcode = SUB_OPCODE_ALU_IMM
@@ -457,10 +479,10 @@ def _alu3(reg_dest, reg_src1, reg_imm_src2, alu_sel):
457479
_alu_reg.sub_opcode = SUB_OPCODE_ALU_REG
458480
_alu_reg.opcode = OPCODE_ALU
459481
return _alu_reg.all
460-
if src2.type == IMM:
482+
if src2.type == IMM or src2.type == SYM:
461483
_alu_imm.dreg = dest
462484
_alu_imm.sreg = src1
463-
_alu_imm.imm = src2.value
485+
_alu_imm.imm = get_abs(src2)
464486
_alu_imm.unused = 0
465487
_alu_imm.sel = alu_sel
466488
_alu_imm.sub_opcode = SUB_OPCODE_ALU_IMM
@@ -546,9 +568,9 @@ def i_jump(target, condition='--'):
546568
jump_type = BX_JUMP_TYPE_DIRECT
547569
else:
548570
raise ValueError("invalid flags condition")
549-
if target.type == IMM:
571+
if target.type == IMM or target.type == SYM:
550572
_bx.dreg = 0
551-
_bx.addr = target.value
573+
_bx.addr = get_abs(target)
552574
_bx.unused = 0
553575
_bx.reg = 0
554576
_bx.type = jump_type
@@ -568,7 +590,7 @@ def i_jump(target, condition='--'):
568590

569591

570592
def i_jumpr(offset, threshold, condition):
571-
offset = get_imm(offset)
593+
offset = get_rel(offset)
572594
threshold = get_imm(threshold)
573595
condition = get_cond(condition)
574596
if condition == 'lt':
@@ -587,7 +609,7 @@ def i_jumpr(offset, threshold, condition):
587609

588610

589611
def i_jumps(offset, threshold, condition):
590-
offset = get_imm(offset)
612+
offset = get_rel(offset)
591613
threshold = get_imm(threshold)
592614
condition = get_cond(condition)
593615
if condition == 'lt':

tests/01_compat_tests.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ for src_file in $(ls -1 compat/*.S); do
2929
echo "py-esp32-ulp log:"
3030
cat $log_file
3131
echo "py-esp32-ulp output:"
32-
xxd -e $ulp_file
32+
xxd $ulp_file
3333
echo "binutils output:"
34-
xxd -e $bin_file
34+
xxd $bin_file
3535
exit 1
3636
else
3737
echo -e "\tBuild outputs match"

tests/assemble.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from esp32_ulp.assemble import Assembler, TEXT, DATA, BSS
1+
from esp32_ulp.assemble import Assembler, TEXT, DATA, BSS, REL, ABS
22

33
src = """\
44
@@ -31,9 +31,10 @@ def test_parse():
3131
def test_assemble():
3232
a = Assembler()
3333
a.assemble(src)
34-
assert {'start', 'end'} <= set(a.symbols)
35-
assert a.symbols['start'] == (TEXT, 0)
36-
assert a.symbols['end'] == (TEXT, 4)
34+
assert a.symbols.has_sym('start')
35+
assert a.symbols.has_sym('end')
36+
assert a.symbols.get_sym('start') == (REL, TEXT, 0)
37+
assert a.symbols.get_sym('end') == (REL, TEXT, 4)
3738
assert len(b''.join(a.sections[TEXT])) == 16 # 4 instructions * 4B
3839
assert len(a.sections[DATA]) == 0
3940
assert a.offsets[BSS] == 0

tests/compat/sections.S

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,12 @@
88

99
.space 4
1010
.space 8, 0xFF
11+
.space 1
12+
.align 4
13+
.space 3
1114

15+
# a section start will be automatically 32bit-aligned:
1216
.bss
1317

1418
.space 10
15-
19+
# a section end will be automatically 32bit-aligned

0 commit comments

Comments
 (0)