Skip to content

Commit cc753bc

Browse files
authored
Merge pull request #3095 from boegel/fix_dump_comments
fix various issues in extracting comments from original easyconfig file and including them again in dumped easyconfig
2 parents 15d3fcf + da3532b commit cc753bc

File tree

2 files changed

+422
-66
lines changed

2 files changed

+422
-66
lines changed

easybuild/framework/easyconfig/format/one.py

Lines changed: 166 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
:author: Kenneth Hoste (Ghent University)
3232
"""
3333
import os
34+
import pprint
3435
import re
3536
import tempfile
3637

@@ -150,12 +151,14 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
150151

151152
elif outer:
152153
# only reformat outer (iterable) values for (too) long lines (or for select parameters)
153-
if isinstance(param_val, (list, tuple, dict)) and ((len(param_val) > 1 and line_too_long) or forced):
154+
if isinstance(param_val, (list, tuple, dict)) and ((len(param_val) > 1 or line_too_long) or forced):
154155

155-
item_tmpl = INDENT_4SPACES + '%(item)s,%(comment)s\n'
156+
item_tmpl = INDENT_4SPACES + '%(item)s,%(inline_comment)s\n'
157+
158+
start_char, end_char = param_strval[0], param_strval[-1]
156159

157160
# start with opening character: [, (, {
158-
res = '%s\n' % param_strval[0]
161+
res = '%s\n' % start_char
159162

160163
# add items one-by-one, special care for dict values (order of keys, different format for elements)
161164
if isinstance(param_val, dict):
@@ -166,18 +169,32 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
166169
else:
167170
raise EasyBuildError("Missing mandatory key '%s' in %s.", item_key, param_name)
168171

169-
comment = self._get_item_comments(param_name, item_val).get(str(item_val), '')
172+
item_comments = self._get_item_comments(param_name, item_val)
173+
174+
inline_comment = item_comments.get('inline', '')
175+
item_tmpl_dict = {'inline_comment': inline_comment}
176+
177+
for comment in item_comments.get('above', []):
178+
res += INDENT_4SPACES + comment + '\n'
179+
170180
key_pref = quote_py_str(item_key) + ': '
171-
addlen = addlen + len(INDENT_4SPACES) + len(key_pref) + len(comment)
181+
addlen = addlen + len(INDENT_4SPACES) + len(key_pref) + len(inline_comment)
172182
formatted_item_val = self._reformat_line(param_name, item_val, addlen=addlen)
173-
res += item_tmpl % {
174-
'comment': comment,
175-
'item': key_pref + formatted_item_val,
176-
}
183+
item_tmpl_dict['item'] = key_pref + formatted_item_val
184+
185+
res += item_tmpl % item_tmpl_dict
186+
177187
else: # list, tuple
178188
for item in param_val:
179-
comment = self._get_item_comments(param_name, item).get(str(item), '')
180-
addlen = addlen + len(INDENT_4SPACES) + len(comment)
189+
item_comments = self._get_item_comments(param_name, item)
190+
191+
inline_comment = item_comments.get('inline', '')
192+
item_tmpl_dict = {'inline_comment': inline_comment}
193+
194+
for comment in item_comments.get('above', []):
195+
res += INDENT_4SPACES + comment + '\n'
196+
197+
addlen = addlen + len(INDENT_4SPACES) + len(inline_comment)
181198
# the tuples are really strings here that are constructed from the dependency dicts
182199
# so for a plain list of builddependencies param_val is a list of strings here;
183200
# and for iterated builddependencies it is a list of lists of strings
@@ -188,14 +205,20 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
188205
for subitem in item]) + ']'
189206
else:
190207
itemstr = self._reformat_line(param_name, item, addlen=addlen)
208+
item_tmpl_dict['item'] = itemstr
209+
210+
res += item_tmpl % item_tmpl_dict
191211

192-
res += item_tmpl % {
193-
'comment': comment,
194-
'item': itemstr
195-
}
212+
# take into account possible closing comments
213+
# see https://github.com/easybuilders/easybuild-framework/issues/3082
214+
end_comments = self._get_item_comments(param_name, end_char)
215+
for comment in end_comments.get('above', []):
216+
res += INDENT_4SPACES + comment + '\n'
196217

197-
# end with closing character: ], ), }
198-
res += param_strval[-1]
218+
# end with closing character (']', ')', '}'), incl. possible inline comment
219+
res += end_char
220+
if 'inline' in end_comments:
221+
res += end_comments['inline']
199222

200223
else:
201224
# dependencies are already dumped as strings, so they do not need to be quoted again
@@ -207,9 +230,14 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
207230
def _get_item_comments(self, key, val):
208231
"""Get per-item comments for specified parameter name/value."""
209232
item_comments = {}
210-
for comment_key, comment_val in self.comments['iter'].get(key, {}).items():
233+
234+
for comment_key, comment_val in self.comments['iterabove'].get(key, {}).items():
235+
if str(val) in comment_key:
236+
item_comments['above'] = comment_val
237+
238+
for comment_key, comment_val in self.comments['iterinline'].get(key, {}).items():
211239
if str(val) in comment_key:
212-
item_comments[str(val)] = comment_val
240+
item_comments['inline'] = comment_val
213241

214242
return item_comments
215243

@@ -312,64 +340,138 @@ def extract_comments(self, rawtxt):
312340
Inline comments on items of iterable values are also extracted.
313341
"""
314342
self.comments = {
315-
'above': {}, # comments for a particular parameter definition
343+
'above': {}, # comments above a parameter definition
316344
'header': [], # header comment lines
317345
'inline': {}, # inline comments
318-
'iter': {}, # (inline) comments on elements of iterable values
319-
'tail': [],
346+
'iterabove': {}, # comment above elements of iterable values
347+
'iterinline': {}, # inline comments on elements of iterable values
348+
'tail': [], # comment at the end of the easyconfig file
320349
}
321350

322-
rawlines = rawtxt.split('\n')
351+
parsed_ec = self.get_config_dict()
323352

324-
# extract header first
325-
while rawlines and rawlines[0].startswith('#'):
326-
self.comments['header'].append(rawlines.pop(0))
353+
comment_regex = re.compile(r'^\s*#')
354+
param_def_regex = re.compile(r'^([a-z_0-9]+)\s*=')
355+
whitespace_regex = re.compile(r'^\s*$')
327356

328-
parsed_ec = self.get_config_dict()
357+
def clean_part(part):
358+
"""Helper function to strip off trailing whitespace + trailing quotes."""
359+
return part.rstrip().rstrip("'").rstrip('"')
360+
361+
def split_on_comment_hash(line, param_key):
362+
"""Helper function to split line on first (actual) comment character '#'."""
363+
364+
# string representation of easyconfig parameter value,
365+
# used to check if supposed comment isn't actual part of the parameter value
366+
# (and thus not actually a comment at all)
367+
param_strval = str(parsed_ec.get(param_key))
368+
369+
parts = line.split('#')
370+
371+
# first part (before first #) is definitely not part of comment
372+
before_comment = parts.pop(0)
329373

374+
# strip out parts that look like a comment but are actually part of a parameter value
375+
while parts and ('#' + clean_part(parts[0])) in param_strval:
376+
before_comment += '#' + parts.pop(0)
377+
378+
comment = '#'.join(parts)
379+
380+
return before_comment, comment.strip()
381+
382+
def grab_more_comment_lines(lines, param_key):
383+
"""Grab more comment lines."""
384+
385+
comment_lines = []
386+
387+
while lines and (comment_regex.match(lines[0]) or whitespace_regex.match(lines[0])):
388+
line = lines.pop(0)
389+
_, actual_comment = split_on_comment_hash(line, param_key)
390+
# prefix comment with '#' unless line was empty
391+
if line.strip():
392+
actual_comment = '# ' + actual_comment
393+
comment_lines.append(actual_comment.strip())
394+
395+
return comment_lines
396+
397+
rawlines = rawtxt.split('\n')
398+
399+
# extract header first (include empty lines too)
400+
self.comments['header'] = grab_more_comment_lines(rawlines, None)
401+
402+
last_param_key = None
330403
while rawlines:
331404
rawline = rawlines.pop(0)
405+
406+
# keep track of last parameter definition we have seen,
407+
# current line may be (the start of) a parameter definition
408+
res = param_def_regex.match(rawline)
409+
if res:
410+
key = res.group(1)
411+
if key in parsed_ec:
412+
last_param_key = key
413+
414+
if last_param_key:
415+
before_comment, inline_comment = split_on_comment_hash(rawline, last_param_key)
416+
417+
# short-circuit to next line in case there are no actual comments on this (non-empty) line
418+
if before_comment and not inline_comment:
419+
continue
420+
421+
# lines that start with a hash indicate (start of a block of) comment line(s)
332422
if rawline.startswith('#'):
333-
comment = []
334-
# comment could be multi-line
335-
while rawline is not None and (rawline.startswith('#') or not rawline):
336-
# drop empty lines (that don't even include a #)
337-
if rawline:
338-
comment.append(rawline)
339-
# grab next line (if more lines are left)
340-
if rawlines:
341-
rawline = rawlines.pop(0)
423+
comment = [rawline] + grab_more_comment_lines(rawlines, last_param_key)
424+
425+
if rawlines:
426+
# try to pin comment to parameter definition below it
427+
# don't consume the line yet though, it may also include inline comments...
428+
res = param_def_regex.match(rawlines[0])
429+
if res:
430+
last_param_key = res.group(1)
431+
self.comments['above'][last_param_key] = comment
342432
else:
343-
rawline = None
344-
345-
if rawline is None:
346-
self.comments['tail'] = comment
433+
# if the comment is not above a parameter definition,
434+
# then it must be a comment for an item of an iterable parameter value
435+
before_comment, _ = split_on_comment_hash(rawlines[0], last_param_key)
436+
comment_key = before_comment.rstrip()
437+
self.comments['iterabove'].setdefault(last_param_key, {})[comment_key] = comment
347438
else:
348-
key = rawline.split('=', 1)[0].strip()
349-
self.comments['above'][key] = comment
350-
351-
elif '#' in rawline: # inline comment
352-
comment_key, comment_val = None, None
353-
comment = rawline.rsplit('#', 1)[1].strip()
354-
# check whether this line is parameter definition;
355-
# if not, assume it's a continuation of a multi-line value
356-
if re.match(r'^[a-z_]+\s*=', rawline):
357-
comment_key = rawline.split('=', 1)[0].strip()
439+
# if there are no more lines, the comment (block) is at the tail
440+
self.comments['tail'] = comment
441+
442+
elif '#' in rawline:
443+
# if there's a hash character elsewhere in the line (not at the start),
444+
# there are a couple of possibilities:
445+
# - inline comment for a parameter definition (at the end of a non-empty line)
446+
# - indented comment for an item value of an iterable easyconfig parameter (list, dict, ...)
447+
# - inline comment for an item value of an iterable easyconfig parameter
448+
449+
before_comment, comment = split_on_comment_hash(rawline, last_param_key)
450+
comment = ('# ' + comment).rstrip()
451+
452+
# first check whether current line is an easyconfig parameter definition
453+
# if so, the comment is an inline comment
454+
if param_def_regex.match(before_comment):
455+
self.comments['inline'][last_param_key] = ' ' + comment
456+
457+
# if there's only whitespace before the comment,
458+
# then we have an indented comment, and we need to figure out for what exactly
459+
elif whitespace_regex.match(before_comment):
460+
# first consume possible additional comment lines with same indentation
461+
comment = [comment] + grab_more_comment_lines(rawlines, last_param_key)
462+
463+
before_comment, inline_comment = split_on_comment_hash(rawlines.pop(0), last_param_key)
464+
comment_key = before_comment.rstrip()
465+
self.comments['iterabove'].setdefault(last_param_key, {})[comment_key] = comment
466+
if inline_comment:
467+
inline_comment = (' # ' + inline_comment).rstrip()
468+
self.comments['iterinline'].setdefault(last_param_key, {})[comment_key] = inline_comment
358469
else:
359-
# determine parameter value where the item value on this line is a part of
360-
for key, val in parsed_ec.items():
361-
item_val = re.sub(r',$', r'', rawline.rsplit('#', 1)[0].strip())
362-
if not isinstance(val, string_type) and item_val in str(val):
363-
comment_key, comment_val = key, item_val
364-
break
365-
366-
# check if hash actually indicated a comment; or is part of the value
367-
if comment_key in parsed_ec:
368-
if comment.replace("'", '').replace('"', '') not in str(parsed_ec[comment_key]):
369-
if comment_val:
370-
self.comments['iter'].setdefault(comment_key, {})[comment_val] = ' # ' + comment
371-
else:
372-
self.comments['inline'][comment_key] = ' # ' + comment
470+
# inline comment for item of iterable value
471+
comment_key = before_comment.rstrip()
472+
self.comments['iterinline'].setdefault(last_param_key, {})[comment_key] = ' ' + comment
473+
474+
self.log.debug("Extracted comments:\n%s", pprint.pformat(self.comments, width=120))
373475

374476

375477
def retrieve_blocks_in_spec(spec, only_blocks, silent=False):

0 commit comments

Comments
 (0)