Skip to content

Commit feb42dc

Browse files
committed
add support for evaluating expressions
This change allows immediate values to be calculated from an expression, such as 1+1, even including symbols such as "100 << const" (where const was defined with the .set directive). Expressions are also supported in the .set directives. Expressions are evaluated using the built-in eval(). To prevent misuse or malicious code execution, expressions are validated. At the point when eval is called, all symbols should have already been resolved to their values. That means we only need to allow for numeric characters along with arithmetic and bitwise operators, round brackets and whitespace. The character 'x' and the characters 'abcdef' are also accepted to allow for hex numbers such as 0x123abc. These are only allowed however in sequences starting with 0x. If any other character is encountered the expression is deemed invalid and an exception is raised.
1 parent 54b117e commit feb42dc

File tree

7 files changed

+219
-5
lines changed

7 files changed

+219
-5
lines changed

esp32_ulp/assemble.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def d_align(self, align=4, fill=None):
240240
self.fill(self.section, amount, fill)
241241

242242
def d_set(self, symbol, expr):
243-
value = int(expr) # TODO: support more than just integers
243+
value = int(opcodes.eval_arg(expr)) # TODO: support more than just integers
244244
self.symbols.set_sym(symbol, ABS, None, value)
245245

246246
def d_global(self, symbol):

esp32_ulp/opcodes.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN
77

88
from .soc import *
9+
from .util import split_tokens, validate_expression
910

1011
# XXX dirty hack: use a global for the symbol table
1112
symbols = None
@@ -267,6 +268,20 @@ def make_ins(layout):
267268
ARG = namedtuple('ARG', ('type', 'value', 'raw'))
268269

269270

271+
def eval_arg(arg):
272+
parts = []
273+
for token in split_tokens(arg):
274+
if symbols.has_sym(token):
275+
_, _, sym_value = symbols.get_sym(token)
276+
parts.append(str(sym_value))
277+
else:
278+
parts.append(token)
279+
parts = "".join(parts)
280+
if not validate_expression(parts):
281+
raise ValueError('Unsupported expression: %s' % parts)
282+
return eval(parts)
283+
284+
270285
def arg_qualify(arg):
271286
"""
272287
look at arg and qualify its type:
@@ -289,8 +304,12 @@ def arg_qualify(arg):
289304
return ARG(IMM, int(arg), arg)
290305
except ValueError:
291306
pass
292-
entry = symbols.get_sym(arg)
293-
return ARG(SYM, entry, arg)
307+
try:
308+
entry = symbols.get_sym(arg)
309+
return ARG(SYM, entry, arg)
310+
except KeyError:
311+
pass
312+
return ARG(IMM, int(eval_arg(arg)), arg)
294313

295314

296315
def get_reg(arg):

esp32_ulp/util.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,68 @@
22

33
import gc
44

5+
NORMAL, WHITESPACE = 0, 1
6+
57

68
def garbage_collect(msg, verbose=DEBUG):
79
free_before = gc.mem_free()
810
gc.collect()
911
free_after = gc.mem_free()
1012
if verbose:
1113
print("%s: %d --gc--> %d bytes free" % (msg, free_before, free_after))
14+
15+
16+
def split_tokens(line):
17+
buf = ""
18+
tokens = []
19+
state = NORMAL
20+
for c in line:
21+
if ('a' <= c <= 'z') or ('A' <= c <= 'Z') or ('0' <= c <= '9') or c == '_':
22+
if state != NORMAL:
23+
if len(buf) > 0:
24+
tokens.append(buf)
25+
buf = ""
26+
state = NORMAL
27+
buf += c
28+
elif c == ' ' or c == '\t':
29+
if state != WHITESPACE:
30+
if len(buf) > 0:
31+
tokens.append(buf)
32+
buf = ""
33+
state = WHITESPACE
34+
buf += c
35+
else:
36+
if len(buf) > 0:
37+
tokens.append(buf)
38+
buf = ""
39+
tokens.append(c)
40+
41+
if len(buf) > 0:
42+
tokens.append(buf)
43+
44+
return tokens
45+
46+
47+
def validate_expression(param):
48+
for token in split_tokens(param):
49+
state = 0
50+
for c in token:
51+
if c not in ' \t+-*/%()<>&|~x0123456789abcdef':
52+
return False
53+
54+
# the following allows hex digits a-f after 0x but not otherwise
55+
if state == 0:
56+
if c in 'abcdef':
57+
return False
58+
if c == '0':
59+
state = 1
60+
continue
61+
62+
if state == 1:
63+
state = 2 if c == 'x' else 0
64+
continue
65+
66+
if state == 2:
67+
if c not in '0123456789abcdef':
68+
state = 0
69+
return True

tests/00_unit_tests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
set -e
66

7-
for file in opcodes assemble link ; do
7+
for file in opcodes assemble link util; do
88
echo testing $file...
99
micropython $file.py
1010
done

tests/assemble.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,28 @@ def test_assemble_uppercase_opcode():
135135
assert not raised
136136

137137

138+
def test_assemble_evalulate_expressions():
139+
src_w_expr = """\
140+
.set shft, 2
141+
.set loops, (1 << shft)
142+
143+
entry:
144+
move r0, 1+1
145+
move r1, loops
146+
move r2, (shft + 10) * 2
147+
move r3, entry << 2
148+
"""
149+
a = Assembler()
150+
a.assemble(src_w_expr)
151+
152+
assert a.symbols.has_sym('shft')
153+
assert a.symbols.has_sym('loops')
154+
assert a.symbols.has_sym('entry')
155+
assert a.symbols.get_sym('shft') == (ABS, None, 2)
156+
assert a.symbols.get_sym('loops') == (ABS, None, 4)
157+
assert a.symbols.get_sym('entry') == (REL, TEXT, 0)
158+
159+
138160
def test_symbols():
139161
st = SymbolTable({}, {}, {})
140162
for entry in [
@@ -195,4 +217,5 @@ def test_symbols():
195217
test_assemble_bss_with_value()
196218
test_assemble_global()
197219
test_assemble_uppercase_opcode()
220+
test_assemble_evalulate_expressions()
198221
test_symbols()

tests/opcodes.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN
22
from esp32_ulp.opcodes import make_ins, make_ins_struct_def
3-
from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, ARG, REG, IMM, COND
3+
from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND
4+
from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT
5+
import esp32_ulp.opcodes as opcodes
46

57
OPCODE_DELAY = 4
68
LAYOUT_DELAY = """
@@ -43,6 +45,19 @@ def test_arg_qualify():
4345
assert arg_qualify('Eq') == ARG(COND, 'eq', 'Eq')
4446
assert arg_qualify('EQ') == ARG(COND, 'eq', 'EQ')
4547

48+
# for the next tests, ensure the opcodes module has a SymbolTable
49+
opcodes.symbols = SymbolTable({}, {}, {})
50+
opcodes.symbols.set_sym('const', ABS, None, 42) # constant as defined by .set
51+
opcodes.symbols.set_sym('entry', REL, TEXT, 4) # label pointing to code
52+
53+
assert arg_qualify('1+1') == ARG(IMM, 2, '1+1')
54+
assert arg_qualify('const >> 1') == ARG(IMM, 21, 'const >> 1')
55+
assert arg_qualify('entry') == ARG(SYM, (REL, TEXT, 4), 'entry') # symbols should not (yet) be evaluated
56+
assert arg_qualify('entry + const') == ARG(IMM, 46, 'entry + const')
57+
58+
# clean up
59+
opcodes.symbols = None
60+
4661

4762
def test_get_reg():
4863
assert get_reg('r0') == 0
@@ -57,9 +72,46 @@ def test_get_cond():
5772
assert get_cond('Eq') == 'eq'
5873

5974

75+
def test_eval_arg():
76+
opcodes.symbols = SymbolTable({}, {}, {})
77+
opcodes.symbols.set_sym('const', ABS, None, 42) # constant
78+
opcodes.symbols.set_sym('raise', ABS, None, 99) # constant using a python keyword as name (is allowed)
79+
80+
assert eval_arg('1+1') == 2
81+
assert eval_arg('1+const') == 43
82+
assert eval_arg('raise*2/3') == 66
83+
assert eval_arg('raise-const') == 57
84+
assert eval_arg('(raise-const)*2') == 114
85+
assert eval_arg('const % 5') == 2
86+
assert eval_arg('const + 0x19af') == 0x19af + 42
87+
assert eval_arg('const & ~2') == 40
88+
assert eval_arg('const << 3') == 336
89+
assert eval_arg('const >> 1') == 21
90+
assert eval_arg('(const|4)&0xf') == 0xe
91+
92+
assert_raises(ValueError, eval_arg, 'evil()')
93+
assert_raises(ValueError, eval_arg, 'def cafe()')
94+
assert_raises(ValueError, eval_arg, '1 ^ 2')
95+
assert_raises(ValueError, eval_arg, '!100')
96+
97+
# clean up
98+
opcodes.symbols = None
99+
100+
101+
def assert_raises(exception, func, *args):
102+
try:
103+
func(*args)
104+
except exception:
105+
raised = True
106+
else:
107+
raised = False
108+
assert raised
109+
110+
60111
test_make_ins_struct_def()
61112
test_make_ins()
62113
test_arg_qualify()
63114
test_get_reg()
64115
test_get_imm()
65116
test_get_cond()
117+
test_eval_arg()

tests/util.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from esp32_ulp.util import split_tokens, validate_expression
2+
3+
tests = []
4+
5+
6+
def test(param):
7+
"""
8+
the @test decorator
9+
"""
10+
tests.append(param)
11+
12+
13+
@test
14+
def test_split_tokens():
15+
assert split_tokens("") == []
16+
assert split_tokens("t") == ['t']
17+
assert split_tokens("test") == ['test']
18+
assert split_tokens("t t") == ['t', ' ', 't']
19+
assert split_tokens("t,t") == ['t', ',', 't']
20+
assert split_tokens("test(arg)") == ['test', '(', 'arg', ')']
21+
assert split_tokens("test(arg,arg2)") == ['test', '(', 'arg', ',', 'arg2', ')']
22+
assert split_tokens("test(arg,arg2)") == ['test', '(', 'arg', ',', 'arg2', ')']
23+
assert split_tokens(" test( arg, arg2)") == [' ', 'test', '(', ' ', 'arg', ',', ' ', 'arg2', ')']
24+
assert split_tokens(" test( arg ) ") == [' ', 'test', '(', ' ', 'arg', ' ', ')', ' ']
25+
assert split_tokens("\t test \t ") == ['\t ', 'test', " \t "]
26+
assert split_tokens("test\nrow2") == ['test', "\n", "row2"]
27+
28+
# split_token does not support comments. should generally only be used after comments are already stripped
29+
assert split_tokens("test(arg /*comment*/)") == ['test', '(', 'arg', ' ', '/', '*', 'comment', '*', '/', ')']
30+
assert split_tokens("#test") == ['#', 'test']
31+
32+
33+
@test
34+
def test_validate_expression():
35+
assert validate_expression('') is True
36+
assert validate_expression('1') is True
37+
assert validate_expression('1+1') is True
38+
assert validate_expression('(1+1)') is True
39+
assert validate_expression('(1+1)*2') is True
40+
assert validate_expression('(1 + 1)') is True
41+
assert validate_expression('10 % 2') is True
42+
assert validate_expression('0x100 << 2') is True
43+
assert validate_expression('0x100 & ~2') is True
44+
assert validate_expression('0xabcdef') is True
45+
assert validate_expression('0x123def') is True
46+
assert validate_expression('2*3+4/5&6|7') is True
47+
assert validate_expression('(((((1+1) * 2') is True # valid characters, even if expression is not valid
48+
49+
assert validate_expression(':') is False
50+
assert validate_expression('_') is False
51+
assert validate_expression('=') is False
52+
assert validate_expression('.') is False
53+
assert validate_expression('!') is False
54+
assert validate_expression('123 ^ 4') is False # operator not supported for now
55+
assert validate_expression('evil()') is False
56+
assert validate_expression('def cafe()') is False # valid hex digits, but potentially dangerous code
57+
58+
59+
if __name__ == '__main__':
60+
# run all methods marked with @test
61+
for t in tests:
62+
t()

0 commit comments

Comments
 (0)