Skip to content

Commit 4e907ca

Browse files
committed
fix various issues in extracting comments from original easyconfig file and including them again in dumped easyconfig
1 parent 6d593b0 commit 4e907ca

File tree

2 files changed

+405
-66
lines changed

2 files changed

+405
-66
lines changed

easybuild/framework/easyconfig/format/one.py

Lines changed: 162 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
:author: Kenneth Hoste (Ghent University)
3232
"""
3333
import os
34+
import pprint
3435
import re
3536
import tempfile
3637

@@ -150,12 +151,14 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
150151

151152
elif outer:
152153
# only reformat outer (iterable) values for (too) long lines (or for select parameters)
153-
if isinstance(param_val, (list, tuple, dict)) and ((len(param_val) > 1 and line_too_long) or forced):
154+
if isinstance(param_val, (list, tuple, dict)) and ((len(param_val) > 1 or line_too_long) or forced):
154155

155-
item_tmpl = INDENT_4SPACES + '%(item)s,%(comment)s\n'
156+
item_tmpl = INDENT_4SPACES + '%(item)s,%(inline_comment)s\n'
157+
158+
start_char, end_char = param_strval[0], param_strval[-1]
156159

157160
# start with opening character: [, (, {
158-
res = '%s\n' % param_strval[0]
161+
res = '%s\n' % start_char
159162

160163
# add items one-by-one, special care for dict values (order of keys, different format for elements)
161164
if isinstance(param_val, dict):
@@ -166,18 +169,32 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
166169
else:
167170
raise EasyBuildError("Missing mandatory key '%s' in %s.", item_key, param_name)
168171

169-
comment = self._get_item_comments(param_name, item_val).get(str(item_val), '')
172+
item_comments = self._get_item_comments(param_name, item_val)
173+
174+
inline_comment = item_comments.get('inline', '')
175+
item_tmpl_dict = {'inline_comment': inline_comment}
176+
177+
for comment in item_comments.get('above', []):
178+
res += INDENT_4SPACES + comment + '\n'
179+
170180
key_pref = quote_py_str(item_key) + ': '
171-
addlen = addlen + len(INDENT_4SPACES) + len(key_pref) + len(comment)
181+
addlen = addlen + len(INDENT_4SPACES) + len(key_pref) + len(inline_comment)
172182
formatted_item_val = self._reformat_line(param_name, item_val, addlen=addlen)
173-
res += item_tmpl % {
174-
'comment': comment,
175-
'item': key_pref + formatted_item_val,
176-
}
183+
item_tmpl_dict['item'] = key_pref + formatted_item_val
184+
185+
res += item_tmpl % item_tmpl_dict
186+
177187
else: # list, tuple
178188
for item in param_val:
179-
comment = self._get_item_comments(param_name, item).get(str(item), '')
180-
addlen = addlen + len(INDENT_4SPACES) + len(comment)
189+
item_comments = self._get_item_comments(param_name, item)
190+
191+
inline_comment = item_comments.get('inline', '')
192+
item_tmpl_dict = {'inline_comment': inline_comment}
193+
194+
for comment in item_comments.get('above', []):
195+
res += INDENT_4SPACES + comment + '\n'
196+
197+
addlen = addlen + len(INDENT_4SPACES) + len(inline_comment)
181198
# the tuples are really strings here that are constructed from the dependency dicts
182199
# so for a plain list of builddependencies param_val is a list of strings here;
183200
# and for iterated builddependencies it is a list of lists of strings
@@ -188,14 +205,20 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
188205
for subitem in item]) + ']'
189206
else:
190207
itemstr = self._reformat_line(param_name, item, addlen=addlen)
208+
item_tmpl_dict['item'] = itemstr
209+
210+
res += item_tmpl % item_tmpl_dict
191211

192-
res += item_tmpl % {
193-
'comment': comment,
194-
'item': itemstr
195-
}
212+
# take into account possible closing comments
213+
# see https://github.com/easybuilders/easybuild-framework/issues/3082
214+
end_comments = self._get_item_comments(param_name, end_char)
215+
for comment in end_comments.get('above', []):
216+
res += INDENT_4SPACES + comment + '\n'
196217

197-
# end with closing character: ], ), }
198-
res += param_strval[-1]
218+
# end with closing character (']', ')', '}'), incl. possible inline comment
219+
res += end_char
220+
if 'inline' in end_comments:
221+
res += end_comments['inline']
199222

200223
else:
201224
# dependencies are already dumped as strings, so they do not need to be quoted again
@@ -207,9 +230,14 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
207230
def _get_item_comments(self, key, val):
208231
"""Get per-item comments for specified parameter name/value."""
209232
item_comments = {}
210-
for comment_key, comment_val in self.comments['iter'].get(key, {}).items():
233+
234+
for comment_key, comment_val in self.comments['iterabove'].get(key, {}).items():
235+
if str(val) in comment_key:
236+
item_comments['above'] = comment_val
237+
238+
for comment_key, comment_val in self.comments['iterinline'].get(key, {}).items():
211239
if str(val) in comment_key:
212-
item_comments[str(val)] = comment_val
240+
item_comments['inline'] = comment_val
213241

214242
return item_comments
215243

@@ -312,64 +340,134 @@ def extract_comments(self, rawtxt):
312340
Inline comments on items of iterable values are also extracted.
313341
"""
314342
self.comments = {
315-
'above': {}, # comments for a particular parameter definition
343+
'above': {}, # comments above a parameter definition
316344
'header': [], # header comment lines
317345
'inline': {}, # inline comments
318-
'iter': {}, # (inline) comments on elements of iterable values
319-
'tail': [],
346+
'iterabove': {}, # comment above elements of iterable values
347+
'iterinline': {}, # inline comments on elements of iterable values
348+
'tail': [], # comment at the end of the easyconfig file
320349
}
321350

322-
rawlines = rawtxt.split('\n')
351+
parsed_ec = self.get_config_dict()
323352

324-
# extract header first
325-
while rawlines and rawlines[0].startswith('#'):
326-
self.comments['header'].append(rawlines.pop(0))
353+
param_def_regex = re.compile('^([a-z__0-9]+)\s*=')
354+
whitespace_regex = re.compile('^\s*$')
327355

328-
parsed_ec = self.get_config_dict()
356+
def clean_part(part):
357+
"""Helper function to strip off trailing whitespace + trailing quotes."""
358+
return part.rstrip().rstrip("'").rstrip('"')
359+
360+
def split_on_comment_hash(line, param_key):
361+
"""Helper function to split line on first (actual) comment character '#'."""
362+
363+
# string representation of easyconfig parameter value,
364+
# used to check is supposed comment isn't actual part of the parameter value
365+
# (and thus not actually a comment at all)
366+
param_strval = str(parsed_ec.get(param_key))
367+
368+
parts = line.split('#')
369+
370+
# first part (before first #) is definitely not part of comment
371+
before_comment = parts.pop(0)
329372

373+
# strip out parts that look like a comment but are actually part of a parameter value
374+
while parts and ('#' + clean_part(parts[0])) in param_strval:
375+
before_comment += '#' + parts.pop(0)
376+
377+
comment = '#'.join(parts)
378+
379+
return before_comment, comment.strip()
380+
381+
def grab_more_comment_lines(lines, indent, param_key):
382+
"""Grab more comment lines that match specified indent."""
383+
384+
comment_lines = []
385+
386+
while lines and (lines[0].startswith(indent + '#') or whitespace_regex.match(lines[0])):
387+
line = lines.pop(0)
388+
_, actual_comment = split_on_comment_hash(line, param_key)
389+
# prefix comment with '#' unless line was empty
390+
if line:
391+
actual_comment = '# ' + actual_comment
392+
comment_lines.append(actual_comment.strip())
393+
394+
return comment_lines
395+
396+
rawlines = rawtxt.split('\n')
397+
398+
# extract header first (include empty lines too)
399+
self.comments['header'] = grab_more_comment_lines(rawlines, '', None)
400+
401+
last_param_key = None
330402
while rawlines:
331403
rawline = rawlines.pop(0)
404+
405+
# keep track of last parameter definition we ran into
406+
res = param_def_regex.match(rawline)
407+
if res:
408+
key = res.group(1)
409+
if key in parsed_ec:
410+
last_param_key = key
411+
412+
if last_param_key:
413+
before_comment, inline_comment = split_on_comment_hash(rawline, last_param_key)
414+
415+
# short-circuit to next line in case there are no actual comments on this (non-empty) line
416+
if before_comment and not inline_comment:
417+
continue
418+
419+
# lines that start with a hash indicate (start of a block of) comment line(s)
332420
if rawline.startswith('#'):
333-
comment = []
334-
# comment could be multi-line
335-
while rawline is not None and (rawline.startswith('#') or not rawline):
336-
# drop empty lines (that don't even include a #)
337-
if rawline:
338-
comment.append(rawline)
339-
# grab next line (if more lines are left)
340-
if rawlines:
341-
rawline = rawlines.pop(0)
421+
comment = [rawline] + grab_more_comment_lines(rawlines, '', last_param_key)
422+
423+
if rawlines:
424+
# try to pin comment to parameter definition below it
425+
# don't consume the line yet though, it may also include inline comments...
426+
res = param_def_regex.match(rawlines[0])
427+
if res:
428+
last_param_key = res.group(1)
429+
self.comments['above'][last_param_key] = comment
342430
else:
343-
rawline = None
344-
345-
if rawline is None:
346-
self.comments['tail'] = comment
431+
# if the comment is not above a parameter definition,
432+
# just use the whole next line to determine where the comment belongs...
433+
self.comments['above'][rawlines[0]] = comment
347434
else:
348-
key = rawline.split('=', 1)[0].strip()
349-
self.comments['above'][key] = comment
350-
351-
elif '#' in rawline: # inline comment
352-
comment_key, comment_val = None, None
353-
comment = rawline.rsplit('#', 1)[1].strip()
354-
# check whether this line is parameter definition;
355-
# if not, assume it's a continuation of a multi-line value
356-
if re.match(r'^[a-z_]+\s*=', rawline):
357-
comment_key = rawline.split('=', 1)[0].strip()
435+
# if there are no more lines, the comment (block) is at the tail
436+
self.comments['tail'] = comment
437+
438+
elif '#' in rawline:
439+
# if there's a hash character elsewhere in the line (not at the start),
440+
# there are a couple of possibilities:
441+
# - inline comment for a parameter definition (at the end of a non-empty line)
442+
# - indented comment for an item value of an iterable easyconfig parameter (list, dict, ...)
443+
# - inline comment for an item value of an iterable easyconfig parameter
444+
445+
before_comment, comment = split_on_comment_hash(rawline, last_param_key)
446+
comment = ('# ' + comment).rstrip()
447+
448+
# first check whether current line is an easyconfig parameter definition
449+
# if so, the comment is an inline comment
450+
if param_def_regex.match(before_comment):
451+
self.comments['inline'][last_param_key] = ' ' + comment
452+
453+
# if there's only whitespace before the comment,
454+
# then we have an indented comment, and we need to figure out for what exactly
455+
elif whitespace_regex.match(before_comment):
456+
# first consume possible additional comment lines with same indentation
457+
comment = [comment] + grab_more_comment_lines(rawlines, before_comment, last_param_key)
458+
459+
before_comment, inline_comment = split_on_comment_hash(rawlines.pop(0), last_param_key)
460+
comment_key = before_comment.rstrip()
461+
self.comments['iterabove'].setdefault(last_param_key, {})[comment_key] = comment
462+
if inline_comment:
463+
inline_comment = (' # ' + inline_comment).rstrip()
464+
self.comments['iterinline'].setdefault(last_param_key, {})[comment_key] = inline_comment
358465
else:
359-
# determine parameter value where the item value on this line is a part of
360-
for key, val in parsed_ec.items():
361-
item_val = re.sub(r',$', r'', rawline.rsplit('#', 1)[0].strip())
362-
if not isinstance(val, string_type) and item_val in str(val):
363-
comment_key, comment_val = key, item_val
364-
break
365-
366-
# check if hash actually indicated a comment; or is part of the value
367-
if comment_key in parsed_ec:
368-
if comment.replace("'", '').replace('"', '') not in str(parsed_ec[comment_key]):
369-
if comment_val:
370-
self.comments['iter'].setdefault(comment_key, {})[comment_val] = ' # ' + comment
371-
else:
372-
self.comments['inline'][comment_key] = ' # ' + comment
466+
# inline comment for item of iterable value
467+
comment_key = before_comment.rstrip()
468+
self.comments['iterinline'].setdefault(last_param_key, {})[comment_key] = ' ' + comment
469+
470+
self.log.debug("Extracted comments:\n%s", pprint.pformat(self.comments, width=120))
373471

374472

375473
def retrieve_blocks_in_spec(spec, only_blocks, silent=False):

0 commit comments

Comments
 (0)