Skip to content

Commit 10ca81e

Browse files
Merge pull request #27 from ThomasWaldmann/preprocess
comment removal by state machine, fixes #17
2 parents 953811d + 8573fee commit 10ca81e

File tree

5 files changed

+200
-28
lines changed

5 files changed

+200
-28
lines changed

esp32_ulp/__main__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,19 @@
44
from .link import make_binary
55

66

7-
def src_to_binary(lines):
7+
def src_to_binary(src):
88
assembler = Assembler()
9-
assembler.assemble(lines)
9+
assembler.assemble(src)
1010
assembler.dump()
1111
text, data, bss_len = assembler.fetch()
1212
return make_binary(text, data, bss_len)
1313

1414

1515
def main(fn):
1616
with open(fn) as f:
17-
lines = f.readlines()
17+
src = f.read()
1818

19-
binary = src_to_binary(lines)
19+
binary = src_to_binary(src)
2020

2121
if fn.endswith('.s') or fn.endswith('.S'):
2222
fn = fn[:-2]

esp32_ulp/assemble.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
"""
44

55
from . import opcodes
6+
from .nocomment import remove_comments
67

78
TEXT, DATA, BSS = 'text', 'data', 'bss'
89

10+
911
class Assembler:
1012

1113
def __init__(self):
@@ -16,15 +18,13 @@ def __init__(self):
1618

1719
def parse_line(self, line):
1820
"""
19-
parse one line of assembler into label, opcode, args
21+
parse one line of assembler into label, opcode, args.
22+
comments already have been removed by pre-processing.
2023
2124
a line looks like (label, opcode, args, comment are all optional):
2225
23-
label: opcode arg1, arg2, ... # rest-of-line comment
26+
label: opcode arg1, arg2, ...
2427
"""
25-
line = line.split('#', 1)[0]
26-
line = line.split('//', 1)[0]
27-
line = line.rstrip()
2828
if not line:
2929
return
3030
has_label = line[0] not in '\t '
@@ -49,7 +49,8 @@ def parse_line(self, line):
4949
return label, opcode, args
5050

5151

52-
def parse(self, lines):
52+
def parse(self, text):
53+
lines = remove_comments(text)
5354
parsed = [self.parse_line(line) for line in lines]
5455
return [p for p in parsed if p is not None]
5556

esp32_ulp/nocomment.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
def remove_comments(s):
2+
"""
3+
Remove comments of these styles:
4+
5+
CHASH: # comment python style, up to: EOL
6+
CSLASHSLASH: // comment C style, up to: EOL
7+
CSLASHSTAR: /* comment C style (single/multi line), up to: */
8+
9+
Strings can be like 'strings' or "strings".
10+
Any comment-starting chars within strings are not considered.
11+
Escaping of (string-end) chars via backslash in strings is considered.
12+
13+
Also, leading and trailing whitespace is removed (after comment removal).
14+
Indented lines are re-indented afterwards with a single tab char.
15+
16+
Line numbers stay as in input file because empty lines are kept.
17+
18+
s: string with comments (can include newlines)
19+
returns: list of text lines
20+
"""
21+
# note: micropython's ure module was not capable enough to process this:
22+
# missing methods, re modes, recursion limit exceeded, ...
23+
# simpler hacks also didn't seem powerful enough to address all the
24+
# corner cases of CSLASHSTAR vs. *STR, so this state machine came to life:
25+
SRC, CHASH, CSLASHSLASH, CSLASHSTAR, DSTR, SSTR = range(6) # states
26+
27+
line = [] # collect chars of one line
28+
lines = [] # collect result lines
29+
30+
def finish_line():
31+
# assemble a line from characters, try to get rid of trailing and
32+
# most of leading whitespace (keep/put one tab for indented lines).
33+
nonlocal line
34+
line = ''.join(line)
35+
is_indented = line.startswith(' ') or line.startswith('\t')
36+
line = line.strip()
37+
if line and is_indented:
38+
line = '\t' + line
39+
lines.append(line)
40+
line = []
41+
42+
state = SRC
43+
i = 0
44+
length = len(s)
45+
while i < length:
46+
c = s[i]
47+
cn = s[i + 1] if i + 1 < length else '\0'
48+
if state == SRC:
49+
if c == '#': # starting to-EOL comment
50+
state = CHASH
51+
i += 1
52+
elif c == '/':
53+
if cn == '/': # starting to-EOL comment
54+
state = CSLASHSLASH
55+
i += 2
56+
elif cn == '*': # starting a /* comment
57+
state = CSLASHSTAR
58+
i += 2
59+
else:
60+
i += 1
61+
line.append(c)
62+
elif c == '"':
63+
state = DSTR
64+
i += 1
65+
line.append(c)
66+
elif c == "'":
67+
state = SSTR
68+
i += 1
69+
line.append(c)
70+
elif c == '\n':
71+
i += 1
72+
finish_line()
73+
else:
74+
i += 1
75+
line.append(c)
76+
elif state == CHASH or state == CSLASHSLASH:
77+
if c != '\n': # comment runs until EOL
78+
i += 1
79+
else:
80+
state = SRC
81+
i += 1
82+
finish_line()
83+
elif state == CSLASHSTAR:
84+
if c == '*' and cn == '/': # ending a comment */
85+
state = SRC
86+
i += 2
87+
elif c == '\n':
88+
i += 1
89+
finish_line()
90+
else:
91+
i += 1
92+
elif state == DSTR and c == '"' or state == SSTR and c == "'": # string end
93+
state = SRC
94+
i += 1
95+
line.append(c)
96+
elif state == DSTR or state == SSTR:
97+
i += 1
98+
line.append(c)
99+
if c == '\\': # escaping backslash
100+
i += 1 # do not look at char after the backslash
101+
line.append(cn)
102+
else:
103+
raise Exception("state: %d c: %s cn: %s" % (state, c, cn))
104+
if line:
105+
# no final \n triggered processing these chars yet, do it now
106+
finish_line()
107+
return lines
108+
109+
110+
if __name__ == '__main__':
111+
import sys
112+
filename = sys.argv[1]
113+
with open(filename, "r") as f:
114+
text = f.read()
115+
lines = remove_comments(text)
116+
with open(filename + ".nocomments", "w") as f:
117+
for line in lines:
118+
f.write(line + '\n')
119+

tests/assemble.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
from esp32_ulp.assemble import Assembler, TEXT, DATA, BSS
22

33
src = """\
4-
# line 1
54
6-
start: wait 42 # line 3
7-
8-
# line 5
5+
start: wait 42
96
ld r0, r1, 0
10-
st r0, r1,0 # line 7
7+
st r0, r1,0
118
halt
12-
end: // C style comment
9+
end:
1310
"""
1411

1512

@@ -18,27 +15,22 @@ def test_parse_line():
1815
lines = src.splitlines()
1916
# note: line number = index + 1
2017
assert a.parse_line(lines[0]) == None
21-
assert a.parse_line(lines[1]) == None
22-
assert a.parse_line(lines[2]) == ('start', 'wait', ('42', ))
23-
assert a.parse_line(lines[3]) == None
24-
assert a.parse_line(lines[4]) == None
25-
assert a.parse_line(lines[5]) == (None, 'ld', ('r0', 'r1', '0', ))
26-
assert a.parse_line(lines[6]) == (None, 'st', ('r0', 'r1', '0', ))
27-
assert a.parse_line(lines[7]) == (None, 'halt', ())
28-
assert a.parse_line(lines[8]) == ('end', None, ())
18+
assert a.parse_line(lines[1]) == ('start', 'wait', ('42', ))
19+
assert a.parse_line(lines[2]) == (None, 'ld', ('r0', 'r1', '0', ))
20+
assert a.parse_line(lines[3]) == (None, 'st', ('r0', 'r1', '0', ))
21+
assert a.parse_line(lines[4]) == (None, 'halt', ())
22+
assert a.parse_line(lines[5]) == ('end', None, ())
2923

3024

3125
def test_parse():
3226
a = Assembler()
33-
lines = src.splitlines()
34-
result = a.parse(lines)
27+
result = a.parse(src)
3528
assert None not in result
3629

3730

3831
def test_assemble():
3932
a = Assembler()
40-
lines = src.splitlines()
41-
a.assemble(lines)
33+
a.assemble(src)
4234
assert {'start', 'end'} <= set(a.symbols)
4335
assert a.symbols['start'] == (TEXT, 0)
4436
assert a.symbols['end'] == (TEXT, 4)

tests/nocomment.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from esp32_ulp.nocomment import remove_comments
2+
3+
ORIG = """\
4+
/*
5+
* HEADER
6+
*/
7+
8+
# single, full line comment
9+
10+
label: // another rest-of-line comment
11+
12+
nop /* partial line */
13+
14+
.string "try confusing /* with a comment start"
15+
.string "should be there 1" /* another comment */
16+
.string 'try confusing */ with a comment end'
17+
18+
.string "more confusing \\" /* should be there 2"
19+
/* comment */
20+
.string 'more confusing \\' */'
21+
22+
/***** FOOTER *****/
23+
"""
24+
25+
EXPECTED = """\
26+
27+
28+
29+
30+
31+
32+
label:
33+
34+
nop
35+
36+
.string "try confusing /* with a comment start"
37+
.string "should be there 1"
38+
.string 'try confusing */ with a comment end'
39+
40+
.string "more confusing \\" /* should be there 2"
41+
42+
.string 'more confusing \\' */'
43+
44+
45+
"""
46+
47+
48+
def test_remove_comments():
49+
lines_orig = ORIG.splitlines()
50+
len_orig = len(lines_orig)
51+
lines_expected = EXPECTED.splitlines()
52+
len_expected = len(lines_expected)
53+
lines_got = remove_comments(ORIG)
54+
len_got = len(lines_got)
55+
assert len_orig == len_expected == len_got, \
56+
"line count differs %d %d %d" % (len_orig, len_expected, len_got)
57+
assert lines_expected == lines_got, "texts differ"
58+
59+
60+
test_remove_comments()

0 commit comments

Comments
 (0)