3131:author: Kenneth Hoste (Ghent University)
3232"""
3333import os
34+ import pprint
3435import re
3536import tempfile
3637
@@ -150,12 +151,14 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
150151
151152 elif outer :
152153 # only reformat outer (iterable) values for (too) long lines (or for select parameters)
153- if isinstance (param_val , (list , tuple , dict )) and ((len (param_val ) > 1 and line_too_long ) or forced ):
154+ if isinstance (param_val , (list , tuple , dict )) and ((len (param_val ) > 1 or line_too_long ) or forced ):
154155
155- item_tmpl = INDENT_4SPACES + '%(item)s,%(comment)s\n '
156+ item_tmpl = INDENT_4SPACES + '%(item)s,%(inline_comment)s\n '
157+
158+ start_char , end_char = param_strval [0 ], param_strval [- 1 ]
156159
157160 # start with opening character: [, (, {
158- res = '%s\n ' % param_strval [ 0 ]
161+ res = '%s\n ' % start_char
159162
160163 # add items one-by-one, special care for dict values (order of keys, different format for elements)
161164 if isinstance (param_val , dict ):
@@ -166,18 +169,32 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
166169 else :
167170 raise EasyBuildError ("Missing mandatory key '%s' in %s." , item_key , param_name )
168171
169- comment = self ._get_item_comments (param_name , item_val ).get (str (item_val ), '' )
172+ item_comments = self ._get_item_comments (param_name , item_val )
173+
174+ inline_comment = item_comments .get ('inline' , '' )
175+ item_tmpl_dict = {'inline_comment' : inline_comment }
176+
177+ for comment in item_comments .get ('above' , []):
178+ res += INDENT_4SPACES + comment + '\n '
179+
170180 key_pref = quote_py_str (item_key ) + ': '
171- addlen = addlen + len (INDENT_4SPACES ) + len (key_pref ) + len (comment )
181+ addlen = addlen + len (INDENT_4SPACES ) + len (key_pref ) + len (inline_comment )
172182 formatted_item_val = self ._reformat_line (param_name , item_val , addlen = addlen )
173- res += item_tmpl % {
174- 'comment' : comment ,
175- 'item' : key_pref + formatted_item_val ,
176- }
183+ item_tmpl_dict [ 'item' ] = key_pref + formatted_item_val
184+
185+ res += item_tmpl % item_tmpl_dict
186+
177187 else : # list, tuple
178188 for item in param_val :
179- comment = self ._get_item_comments (param_name , item ).get (str (item ), '' )
180- addlen = addlen + len (INDENT_4SPACES ) + len (comment )
189+ item_comments = self ._get_item_comments (param_name , item )
190+
191+ inline_comment = item_comments .get ('inline' , '' )
192+ item_tmpl_dict = {'inline_comment' : inline_comment }
193+
194+ for comment in item_comments .get ('above' , []):
195+ res += INDENT_4SPACES + comment + '\n '
196+
197+ addlen = addlen + len (INDENT_4SPACES ) + len (inline_comment )
181198 # the tuples are really strings here that are constructed from the dependency dicts
182199 # so for a plain list of builddependencies param_val is a list of strings here;
183200 # and for iterated builddependencies it is a list of lists of strings
@@ -188,14 +205,20 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
188205 for subitem in item ]) + ']'
189206 else :
190207 itemstr = self ._reformat_line (param_name , item , addlen = addlen )
208+ item_tmpl_dict ['item' ] = itemstr
209+
210+ res += item_tmpl % item_tmpl_dict
191211
192- res += item_tmpl % {
193- 'comment' : comment ,
194- 'item' : itemstr
195- }
212+ # take into account possible closing comments
213+ # see https://github.com/easybuilders/easybuild-framework/issues/3082
214+ end_comments = self ._get_item_comments (param_name , end_char )
215+ for comment in end_comments .get ('above' , []):
216+ res += INDENT_4SPACES + comment + '\n '
196217
197- # end with closing character: ], ), }
198- res += param_strval [- 1 ]
218+ # end with closing character (']', ')', '}'), incl. possible inline comment
219+ res += end_char
220+ if 'inline' in end_comments :
221+ res += end_comments ['inline' ]
199222
200223 else :
201224 # dependencies are already dumped as strings, so they do not need to be quoted again
@@ -207,9 +230,14 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
207230 def _get_item_comments (self , key , val ):
208231 """Get per-item comments for specified parameter name/value."""
209232 item_comments = {}
210- for comment_key , comment_val in self .comments ['iter' ].get (key , {}).items ():
233+
234+ for comment_key , comment_val in self .comments ['iterabove' ].get (key , {}).items ():
235+ if str (val ) in comment_key :
236+ item_comments ['above' ] = comment_val
237+
238+ for comment_key , comment_val in self .comments ['iterinline' ].get (key , {}).items ():
211239 if str (val ) in comment_key :
212- item_comments [str ( val ) ] = comment_val
240+ item_comments ['inline' ] = comment_val
213241
214242 return item_comments
215243
@@ -312,64 +340,134 @@ def extract_comments(self, rawtxt):
312340 Inline comments on items of iterable values are also extracted.
313341 """
314342 self .comments = {
315- 'above' : {}, # comments for a particular parameter definition
343+ 'above' : {}, # comments above a parameter definition
316344 'header' : [], # header comment lines
317345 'inline' : {}, # inline comments
318- 'iter' : {}, # (inline) comments on elements of iterable values
319- 'tail' : [],
346+ 'iterabove' : {}, # comment above elements of iterable values
347+ 'iterinline' : {}, # inline comments on elements of iterable values
348+ 'tail' : [], # comment at the end of the easyconfig file
320349 }
321350
322- rawlines = rawtxt . split ( ' \n ' )
351+ parsed_ec = self . get_config_dict ( )
323352
324- # extract header first
325- while rawlines and rawlines [0 ].startswith ('#' ):
326- self .comments ['header' ].append (rawlines .pop (0 ))
353+ param_def_regex = re .compile ('^([a-z__0-9]+)\s*=' )
354+ whitespace_regex = re .compile ('^\s*$' )
327355
328- parsed_ec = self .get_config_dict ()
356+ def clean_part (part ):
357+ """Helper function to strip off trailing whitespace + trailing quotes."""
358+ return part .rstrip ().rstrip ("'" ).rstrip ('"' )
359+
360+ def split_on_comment_hash (line , param_key ):
361+ """Helper function to split line on first (actual) comment character '#'."""
362+
363+ # string representation of easyconfig parameter value,
364+ # used to check is supposed comment isn't actual part of the parameter value
365+ # (and thus not actually a comment at all)
366+ param_strval = str (parsed_ec .get (param_key ))
367+
368+ parts = line .split ('#' )
369+
370+ # first part (before first #) is definitely not part of comment
371+ before_comment = parts .pop (0 )
329372
373+ # strip out parts that look like a comment but are actually part of a parameter value
374+ while parts and ('#' + clean_part (parts [0 ])) in param_strval :
375+ before_comment += '#' + parts .pop (0 )
376+
377+ comment = '#' .join (parts )
378+
379+ return before_comment , comment .strip ()
380+
381+ def grab_more_comment_lines (lines , indent , param_key ):
382+ """Grab more comment lines that match specified indent."""
383+
384+ comment_lines = []
385+
386+ while lines and (lines [0 ].startswith (indent + '#' ) or whitespace_regex .match (lines [0 ])):
387+ line = lines .pop (0 )
388+ _ , actual_comment = split_on_comment_hash (line , param_key )
389+ # prefix comment with '#' unless line was empty
390+ if line :
391+ actual_comment = '# ' + actual_comment
392+ comment_lines .append (actual_comment .strip ())
393+
394+ return comment_lines
395+
396+ rawlines = rawtxt .split ('\n ' )
397+
398+ # extract header first (include empty lines too)
399+ self .comments ['header' ] = grab_more_comment_lines (rawlines , '' , None )
400+
401+ last_param_key = None
330402 while rawlines :
331403 rawline = rawlines .pop (0 )
404+
405+ # keep track of last parameter definition we ran into
406+ res = param_def_regex .match (rawline )
407+ if res :
408+ key = res .group (1 )
409+ if key in parsed_ec :
410+ last_param_key = key
411+
412+ if last_param_key :
413+ before_comment , inline_comment = split_on_comment_hash (rawline , last_param_key )
414+
415+ # short-circuit to next line in case there are no actual comments on this (non-empty) line
416+ if before_comment and not inline_comment :
417+ continue
418+
419+ # lines that start with a hash indicate (start of a block of) comment line(s)
332420 if rawline .startswith ('#' ):
333- comment = []
334- # comment could be multi-line
335- while rawline is not None and ( rawline . startswith ( '#' ) or not rawline ) :
336- # drop empty lines (that don't even include a #)
337- if rawline :
338- comment . append ( rawline )
339- # grab next line ( if more lines are left)
340- if rawlines :
341- rawline = rawlines . pop ( 0 )
421+ comment = [rawline ] + grab_more_comment_lines ( rawlines , '' , last_param_key )
422+
423+ if rawlines :
424+ # try to pin comment to parameter definition below it
425+ # don't consume the line yet though, it may also include inline comments...
426+ res = param_def_regex . match ( rawlines [ 0 ] )
427+ if res :
428+ last_param_key = res . group ( 1 )
429+ self . comments [ 'above' ][ last_param_key ] = comment
342430 else :
343- rawline = None
344-
345- if rawline is None :
346- self .comments ['tail' ] = comment
431+ # if the comment is not above a parameter definition,
432+ # just use the whole next line to determine where the comment belongs...
433+ self .comments ['above' ][rawlines [0 ]] = comment
347434 else :
348- key = rawline .split ('=' , 1 )[0 ].strip ()
349- self .comments ['above' ][key ] = comment
350-
351- elif '#' in rawline : # inline comment
352- comment_key , comment_val = None , None
353- comment = rawline .rsplit ('#' , 1 )[1 ].strip ()
354- # check whether this line is parameter definition;
355- # if not, assume it's a continuation of a multi-line value
356- if re .match (r'^[a-z_]+\s*=' , rawline ):
357- comment_key = rawline .split ('=' , 1 )[0 ].strip ()
435+ # if there are no more lines, the comment (block) is at the tail
436+ self .comments ['tail' ] = comment
437+
438+ elif '#' in rawline :
439+ # if there's a hash character elsewhere in the line (not at the start),
440+ # there are a couple of possibilities:
441+ # - inline comment for a parameter definition (at the end of a non-empty line)
442+ # - indented comment for an item value of an iterable easyconfig parameter (list, dict, ...)
443+ # - inline comment for an item value of an iterable easyconfig parameter
444+
445+ before_comment , comment = split_on_comment_hash (rawline , last_param_key )
446+ comment = ('# ' + comment ).rstrip ()
447+
448+ # first check whether current line is an easyconfig parameter definition
449+ # if so, the comment is an inline comment
450+ if param_def_regex .match (before_comment ):
451+ self .comments ['inline' ][last_param_key ] = ' ' + comment
452+
453+ # if there's only whitespace before the comment,
454+ # then we have an indented comment, and we need to figure out for what exactly
455+ elif whitespace_regex .match (before_comment ):
456+ # first consume possible additional comment lines with same indentation
457+ comment = [comment ] + grab_more_comment_lines (rawlines , before_comment , last_param_key )
458+
459+ before_comment , inline_comment = split_on_comment_hash (rawlines .pop (0 ), last_param_key )
460+ comment_key = before_comment .rstrip ()
461+ self .comments ['iterabove' ].setdefault (last_param_key , {})[comment_key ] = comment
462+ if inline_comment :
463+ inline_comment = (' # ' + inline_comment ).rstrip ()
464+ self .comments ['iterinline' ].setdefault (last_param_key , {})[comment_key ] = inline_comment
358465 else :
359- # determine parameter value where the item value on this line is a part of
360- for key , val in parsed_ec .items ():
361- item_val = re .sub (r',$' , r'' , rawline .rsplit ('#' , 1 )[0 ].strip ())
362- if not isinstance (val , string_type ) and item_val in str (val ):
363- comment_key , comment_val = key , item_val
364- break
365-
366- # check if hash actually indicated a comment; or is part of the value
367- if comment_key in parsed_ec :
368- if comment .replace ("'" , '' ).replace ('"' , '' ) not in str (parsed_ec [comment_key ]):
369- if comment_val :
370- self .comments ['iter' ].setdefault (comment_key , {})[comment_val ] = ' # ' + comment
371- else :
372- self .comments ['inline' ][comment_key ] = ' # ' + comment
466+ # inline comment for item of iterable value
467+ comment_key = before_comment .rstrip ()
468+ self .comments ['iterinline' ].setdefault (last_param_key , {})[comment_key ] = ' ' + comment
469+
470+ self .log .debug ("Extracted comments:\n %s" , pprint .pformat (self .comments , width = 120 ))
373471
374472
375473def retrieve_blocks_in_spec (spec , only_blocks , silent = False ):
0 commit comments