Skip to content

Commit e8fcd04

Browse files
committed
Make comment handling more resilient
'#'-signs can be embedded in strings
1 parent 0a86a05 commit e8fcd04

File tree

2 files changed

+55
-14
lines changed

2 files changed

+55
-14
lines changed

easybuild/easyblocks/generic/cargo.py

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,42 @@ def parse_toml_list(value: str) -> List[str]:
117117
return result
118118

119119

120+
def _clean_line(line: str, expected_end: Union[str, None]) -> str:
121+
"""Remove comments and trim line"""
122+
if '#' not in line:
123+
return line.strip()
124+
if expected_end is not None and expected_end[0] in ("'", '"'):
125+
try:
126+
idx = line.index(expected_end) + len(expected_end)
127+
except ValueError:
128+
return line.strip() # Ignore #-sign in multi-line string
129+
else:
130+
idx = 0
131+
in_str = False
132+
escaped = False
133+
while idx < len(line):
134+
c = line[idx]
135+
if in_str:
136+
if escaped:
137+
if c == '\\':
138+
escaped = False
139+
elif c == '"':
140+
in_str = False
141+
elif c == '\\':
142+
escaped = True
143+
elif c == '#':
144+
break
145+
elif c == '"':
146+
in_str = True
147+
elif c == "'":
148+
try:
149+
idx = line.index("'", idx + 1)
150+
except ValueError:
151+
idx = len(line)
152+
idx += 1
153+
return line[:idx].strip()
154+
155+
120156
def parse_toml(file: Path) -> Dict[str, str]:
121157
"""Minimally parse a TOML file into sections, keys and values
122158
@@ -129,9 +165,15 @@ def parse_toml(file: Path) -> Dict[str, str]:
129165
current_section = None
130166
content = read_file(file)
131167
num = raw_line = None
168+
start_end = {
169+
'[': ']',
170+
'{': '}',
171+
'"""': '"""',
172+
"'''": "'''",
173+
}
132174
try:
133175
for num, raw_line in enumerate(content.splitlines()):
134-
line: str = raw_line.split("#", 1)[0].strip()
176+
line: str = _clean_line(raw_line, expected_end)
135177
if not line:
136178
continue
137179
if pending_key is None and line.startswith("[") and line.endswith("]"):
@@ -142,14 +184,10 @@ def parse_toml(file: Path) -> Dict[str, str]:
142184
key, val = line.split("=", 1)
143185
pending_key = key.strip()
144186
pending_value = val.strip()
145-
if pending_value.startswith('['):
146-
expected_end = ']'
147-
elif pending_value.startswith('{'):
148-
expected_end = '}'
149-
elif pending_value.startswith('"""'):
150-
expected_end = '"""'
151-
elif pending_value.startswith("'''"):
152-
expected_end = "'''"
187+
for start, end in start_end.items():
188+
if pending_value.startswith(start):
189+
expected_end = end
190+
break
153191
else:
154192
expected_end = None
155193
else:

test/easyblocks/easyblock_specific.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -329,17 +329,19 @@ def test_cargo_toml_parsers(self):
329329
"""Test get_workspace_members in the Cargo easyblock"""
330330
crate_dir = Path(tempfile.mkdtemp())
331331
cargo_toml = crate_dir / 'Cargo.toml'
332-
333332
# Simple crate
334333
write_file(cargo_toml, textwrap.dedent("""
335334
[package]
336-
name = "my_crate"
335+
#[dummy]
336+
# ignore = this
337+
name = 'my_crate\\' #comment1' # comment2
337338
version = "0.1.0"
338-
edition = "2021"
339+
edition = "2021#2"
339340
description = '''
340341
Line 1
341342
Line 2
342343
'''
344+
documentation = "url?\\"#anchor"
343345
readme = \"""
344346
README.md
345347
\"""
@@ -352,10 +354,11 @@ def test_cargo_toml_parsers(self):
352354
parsed = cargo.parse_toml(cargo_toml)
353355
self.assertEqual(parsed, {
354356
'package': {
355-
'name': '"my_crate"',
357+
'name': "'my_crate\\'",
356358
'version': '"0.1.0"',
357-
'edition': '"2021"',
359+
'edition': '"2021#2"',
358360
'description': "'''\nLine 1\nLine 2\n'''",
361+
'documentation': '"url?\\"#anchor"',
359362
'readme': '"""\nREADME.md\n"""',
360363
'license': '"""MIT"""',
361364
'authors': "[\n'''Name d'Or Si''',\n]",

0 commit comments

Comments
 (0)