Skip to content

Commit d90a0e8

Browse files
committed
improve line parsing
So far line parsing made many assumptions on the format of lines. This commit makes line parsing more flexible and tolerable to different formats. The change is in how labels are identified. Now any symbol that is first on the line (excluding whitespace) and that is followed directly with a colon (:), will be identified as a label. Labels allow all characters that are valid for symbol names, including ._$. This commit contributes to being able to eventually assemble the esp32ulp_all.s test from binutils-esp32ulp. It addresses this line: [esp32ulp_all.s:2](https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/testsuite/gas/esp32ulp/esp32/esp32ulp_all.s#L2) (no space between colon and next character) and also this line: [esp32ulp_globals.s:92](https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/testsuite/gas/esp32ulp/esp32/esp32ulp_globals.s#L92) (label indented).
1 parent c9ff24d commit d90a0e8

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

esp32_ulp/assemble.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,17 +108,21 @@ def parse_line(self, line):
108108
"""
109109
if not line:
110110
return
111-
has_label = line[0] not in '\t .'
111+
has_label = ':' in line
112112
if has_label:
113-
label_line = line.split(None, 1)
113+
orig_line = line.strip()
114+
label_line = orig_line.split(':', 1)
114115
if len(label_line) == 2:
115116
label, line = label_line
116117
else: # 1
117118
label, line = label_line[0], None
118-
label = label.rstrip(':')
119+
120+
if label.strip('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_$.'): # if any chars remain
121+
# if label contains other chars than allowed, it's not a label
122+
label, line = None, orig_line
119123
else:
120124
label, line = None, line.lstrip()
121-
if line is None:
125+
if not line:
122126
opcode, args = None, ()
123127
else:
124128
opcode_args = line.split(None, 1)

tests/assemble.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,30 @@ def test_parse_line():
5353
assert a.parse_line(next(lines)) == (None, '.data', ()) # test left-aligned directive is not treated as label
5454

5555

56+
def test_parse_labels_correctly():
57+
"""
58+
description of what defines a label
59+
https://sourceware.org/binutils/docs/as/Statements.html
60+
https://sourceware.org/binutils/docs/as/Labels.html
61+
"""
62+
a = Assembler()
63+
assert a.parse_line('label: .set const, 42') == ('label', '.set', ('const', '42',))
64+
assert a.parse_line('label:.set const, 42') == ('label', '.set', ('const', '42',))
65+
assert a.parse_line('label:') == ('label', None, ())
66+
assert a.parse_line(' label:') == ('label', None, ())
67+
assert a.parse_line(' label: ') == ('label', None, ())
68+
assert a.parse_line('nop ') == (None, 'nop', ())
69+
assert a.parse_line('.set c, 1 ') == (None, '.set', ('c', '1',))
70+
assert a.parse_line('invalid : nop') == (None, 'invalid', (': nop',)) # no whitespace between label and colon
71+
assert a.parse_line('.string "hello world"') == (None, '.string', ('"hello world"',))
72+
assert a.parse_line('.string "hello : world"') == (None, '.string', ('"hello : world"',)) # colon in string
73+
assert a.parse_line('label::') == ('label', ':', ())
74+
assert a.parse_line('label: :') == ('label', ':', ())
75+
assert a.parse_line('a_label:') == ('a_label', None, ())
76+
assert a.parse_line('$label:') == ('$label', None, ())
77+
assert a.parse_line('.label:') == ('.label', None, ())
78+
79+
5680
def test_parse():
5781
a = Assembler()
5882
lines = remove_comments(src)
@@ -260,6 +284,7 @@ def test_support_multiple_statements_per_line():
260284

261285

262286
test_parse_line()
287+
test_parse_labels_correctly()
263288
test_parse()
264289
test_assemble()
265290
test_assemble_bss()

0 commit comments

Comments
 (0)