3131:author: Kenneth Hoste (Ghent University)
3232"""
3333import os
34+ import pprint
3435import re
3536import tempfile
3637
@@ -150,12 +151,14 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
150151
151152 elif outer :
152153 # only reformat outer (iterable) values for (too) long lines (or for select parameters)
153- if isinstance (param_val , (list , tuple , dict )) and ((len (param_val ) > 1 and line_too_long ) or forced ):
154+ if isinstance (param_val , (list , tuple , dict )) and ((len (param_val ) > 1 or line_too_long ) or forced ):
154155
155- item_tmpl = INDENT_4SPACES + '%(item)s,%(comment)s\n '
156+ item_tmpl = INDENT_4SPACES + '%(item)s,%(inline_comment)s\n '
157+
158+ start_char , end_char = param_strval [0 ], param_strval [- 1 ]
156159
157160 # start with opening character: [, (, {
158- res = '%s\n ' % param_strval [ 0 ]
161+ res = '%s\n ' % start_char
159162
160163 # add items one-by-one, special care for dict values (order of keys, different format for elements)
161164 if isinstance (param_val , dict ):
@@ -166,18 +169,32 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
166169 else :
167170 raise EasyBuildError ("Missing mandatory key '%s' in %s." , item_key , param_name )
168171
169- comment = self ._get_item_comments (param_name , item_val ).get (str (item_val ), '' )
172+ item_comments = self ._get_item_comments (param_name , item_val )
173+
174+ inline_comment = item_comments .get ('inline' , '' )
175+ item_tmpl_dict = {'inline_comment' : inline_comment }
176+
177+ for comment in item_comments .get ('above' , []):
178+ res += INDENT_4SPACES + comment + '\n '
179+
170180 key_pref = quote_py_str (item_key ) + ': '
171- addlen = addlen + len (INDENT_4SPACES ) + len (key_pref ) + len (comment )
181+ addlen = addlen + len (INDENT_4SPACES ) + len (key_pref ) + len (inline_comment )
172182 formatted_item_val = self ._reformat_line (param_name , item_val , addlen = addlen )
173- res += item_tmpl % {
174- 'comment' : comment ,
175- 'item' : key_pref + formatted_item_val ,
176- }
183+ item_tmpl_dict [ 'item' ] = key_pref + formatted_item_val
184+
185+ res += item_tmpl % item_tmpl_dict
186+
177187 else : # list, tuple
178188 for item in param_val :
179- comment = self ._get_item_comments (param_name , item ).get (str (item ), '' )
180- addlen = addlen + len (INDENT_4SPACES ) + len (comment )
189+ item_comments = self ._get_item_comments (param_name , item )
190+
191+ inline_comment = item_comments .get ('inline' , '' )
192+ item_tmpl_dict = {'inline_comment' : inline_comment }
193+
194+ for comment in item_comments .get ('above' , []):
195+ res += INDENT_4SPACES + comment + '\n '
196+
197+ addlen = addlen + len (INDENT_4SPACES ) + len (inline_comment )
181198 # the tuples are really strings here that are constructed from the dependency dicts
182199 # so for a plain list of builddependencies param_val is a list of strings here;
183200 # and for iterated builddependencies it is a list of lists of strings
@@ -188,14 +205,20 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
188205 for subitem in item ]) + ']'
189206 else :
190207 itemstr = self ._reformat_line (param_name , item , addlen = addlen )
208+ item_tmpl_dict ['item' ] = itemstr
209+
210+ res += item_tmpl % item_tmpl_dict
191211
192- res += item_tmpl % {
193- 'comment' : comment ,
194- 'item' : itemstr
195- }
212+ # take into account possible closing comments
213+ # see https://github.com/easybuilders/easybuild-framework/issues/3082
214+ end_comments = self ._get_item_comments (param_name , end_char )
215+ for comment in end_comments .get ('above' , []):
216+ res += INDENT_4SPACES + comment + '\n '
196217
197- # end with closing character: ], ), }
198- res += param_strval [- 1 ]
218+ # end with closing character (']', ')', '}'), incl. possible inline comment
219+ res += end_char
220+ if 'inline' in end_comments :
221+ res += end_comments ['inline' ]
199222
200223 else :
201224 # dependencies are already dumped as strings, so they do not need to be quoted again
@@ -207,9 +230,14 @@ def _reformat_line(self, param_name, param_val, outer=False, addlen=0):
207230 def _get_item_comments (self , key , val ):
208231 """Get per-item comments for specified parameter name/value."""
209232 item_comments = {}
210- for comment_key , comment_val in self .comments ['iter' ].get (key , {}).items ():
233+
234+ for comment_key , comment_val in self .comments ['iterabove' ].get (key , {}).items ():
235+ if str (val ) in comment_key :
236+ item_comments ['above' ] = comment_val
237+
238+ for comment_key , comment_val in self .comments ['iterinline' ].get (key , {}).items ():
211239 if str (val ) in comment_key :
212- item_comments [str ( val ) ] = comment_val
240+ item_comments ['inline' ] = comment_val
213241
214242 return item_comments
215243
@@ -312,64 +340,138 @@ def extract_comments(self, rawtxt):
312340 Inline comments on items of iterable values are also extracted.
313341 """
314342 self .comments = {
315- 'above' : {}, # comments for a particular parameter definition
343+ 'above' : {}, # comments above a parameter definition
316344 'header' : [], # header comment lines
317345 'inline' : {}, # inline comments
318- 'iter' : {}, # (inline) comments on elements of iterable values
319- 'tail' : [],
346+ 'iterabove' : {}, # comment above elements of iterable values
347+ 'iterinline' : {}, # inline comments on elements of iterable values
348+ 'tail' : [], # comment at the end of the easyconfig file
320349 }
321350
322- rawlines = rawtxt . split ( ' \n ' )
351+ parsed_ec = self . get_config_dict ( )
323352
324- # extract header first
325- while rawlines and rawlines [ 0 ]. startswith ( '#' ):
326- self . comments [ 'header' ]. append ( rawlines . pop ( 0 ) )
353+ comment_regex = re . compile ( r'^\s*#' )
354+ param_def_regex = re . compile ( r'^([a-z_0-9]+)\s*=' )
355+ whitespace_regex = re . compile ( r'^\s*$' )
327356
328- parsed_ec = self .get_config_dict ()
357+ def clean_part (part ):
358+ """Helper function to strip off trailing whitespace + trailing quotes."""
359+ return part .rstrip ().rstrip ("'" ).rstrip ('"' )
360+
361+ def split_on_comment_hash (line , param_key ):
362+ """Helper function to split line on first (actual) comment character '#'."""
363+
364+ # string representation of easyconfig parameter value,
365+ # used to check if supposed comment isn't actual part of the parameter value
366+ # (and thus not actually a comment at all)
367+ param_strval = str (parsed_ec .get (param_key ))
368+
369+ parts = line .split ('#' )
370+
371+ # first part (before first #) is definitely not part of comment
372+ before_comment = parts .pop (0 )
329373
374+ # strip out parts that look like a comment but are actually part of a parameter value
375+ while parts and ('#' + clean_part (parts [0 ])) in param_strval :
376+ before_comment += '#' + parts .pop (0 )
377+
378+ comment = '#' .join (parts )
379+
380+ return before_comment , comment .strip ()
381+
382+ def grab_more_comment_lines (lines , param_key ):
383+ """Grab more comment lines."""
384+
385+ comment_lines = []
386+
387+ while lines and (comment_regex .match (lines [0 ]) or whitespace_regex .match (lines [0 ])):
388+ line = lines .pop (0 )
389+ _ , actual_comment = split_on_comment_hash (line , param_key )
390+ # prefix comment with '#' unless line was empty
391+ if line .strip ():
392+ actual_comment = '# ' + actual_comment
393+ comment_lines .append (actual_comment .strip ())
394+
395+ return comment_lines
396+
397+ rawlines = rawtxt .split ('\n ' )
398+
399+ # extract header first (include empty lines too)
400+ self .comments ['header' ] = grab_more_comment_lines (rawlines , None )
401+
402+ last_param_key = None
330403 while rawlines :
331404 rawline = rawlines .pop (0 )
405+
406+ # keep track of last parameter definition we have seen,
407+ # current line may be (the start of) a parameter definition
408+ res = param_def_regex .match (rawline )
409+ if res :
410+ key = res .group (1 )
411+ if key in parsed_ec :
412+ last_param_key = key
413+
414+ if last_param_key :
415+ before_comment , inline_comment = split_on_comment_hash (rawline , last_param_key )
416+
417+ # short-circuit to next line in case there are no actual comments on this (non-empty) line
418+ if before_comment and not inline_comment :
419+ continue
420+
421+ # lines that start with a hash indicate (start of a block of) comment line(s)
332422 if rawline .startswith ('#' ):
333- comment = []
334- # comment could be multi-line
335- while rawline is not None and ( rawline . startswith ( '#' ) or not rawline ) :
336- # drop empty lines (that don't even include a #)
337- if rawline :
338- comment . append ( rawline )
339- # grab next line ( if more lines are left)
340- if rawlines :
341- rawline = rawlines . pop ( 0 )
423+ comment = [rawline ] + grab_more_comment_lines ( rawlines , last_param_key )
424+
425+ if rawlines :
426+ # try to pin comment to parameter definition below it
427+ # don't consume the line yet though, it may also include inline comments...
428+ res = param_def_regex . match ( rawlines [ 0 ] )
429+ if res :
430+ last_param_key = res . group ( 1 )
431+ self . comments [ 'above' ][ last_param_key ] = comment
342432 else :
343- rawline = None
344-
345- if rawline is None :
346- self .comments ['tail' ] = comment
433+ # if the comment is not above a parameter definition,
434+ # then it must be a comment for an item of an iterable parameter value
435+ before_comment , _ = split_on_comment_hash (rawlines [0 ], last_param_key )
436+ comment_key = before_comment .rstrip ()
437+ self .comments ['iterabove' ].setdefault (last_param_key , {})[comment_key ] = comment
347438 else :
348- key = rawline .split ('=' , 1 )[0 ].strip ()
349- self .comments ['above' ][key ] = comment
350-
351- elif '#' in rawline : # inline comment
352- comment_key , comment_val = None , None
353- comment = rawline .rsplit ('#' , 1 )[1 ].strip ()
354- # check whether this line is parameter definition;
355- # if not, assume it's a continuation of a multi-line value
356- if re .match (r'^[a-z_]+\s*=' , rawline ):
357- comment_key = rawline .split ('=' , 1 )[0 ].strip ()
439+ # if there are no more lines, the comment (block) is at the tail
440+ self .comments ['tail' ] = comment
441+
442+ elif '#' in rawline :
443+ # if there's a hash character elsewhere in the line (not at the start),
444+ # there are a couple of possibilities:
445+ # - inline comment for a parameter definition (at the end of a non-empty line)
446+ # - indented comment for an item value of an iterable easyconfig parameter (list, dict, ...)
447+ # - inline comment for an item value of an iterable easyconfig parameter
448+
449+ before_comment , comment = split_on_comment_hash (rawline , last_param_key )
450+ comment = ('# ' + comment ).rstrip ()
451+
452+ # first check whether current line is an easyconfig parameter definition
453+ # if so, the comment is an inline comment
454+ if param_def_regex .match (before_comment ):
455+ self .comments ['inline' ][last_param_key ] = ' ' + comment
456+
457+ # if there's only whitespace before the comment,
458+ # then we have an indented comment, and we need to figure out for what exactly
459+ elif whitespace_regex .match (before_comment ):
460+ # first consume possible additional comment lines with same indentation
461+ comment = [comment ] + grab_more_comment_lines (rawlines , last_param_key )
462+
463+ before_comment , inline_comment = split_on_comment_hash (rawlines .pop (0 ), last_param_key )
464+ comment_key = before_comment .rstrip ()
465+ self .comments ['iterabove' ].setdefault (last_param_key , {})[comment_key ] = comment
466+ if inline_comment :
467+ inline_comment = (' # ' + inline_comment ).rstrip ()
468+ self .comments ['iterinline' ].setdefault (last_param_key , {})[comment_key ] = inline_comment
358469 else :
359- # determine parameter value where the item value on this line is a part of
360- for key , val in parsed_ec .items ():
361- item_val = re .sub (r',$' , r'' , rawline .rsplit ('#' , 1 )[0 ].strip ())
362- if not isinstance (val , string_type ) and item_val in str (val ):
363- comment_key , comment_val = key , item_val
364- break
365-
366- # check if hash actually indicated a comment; or is part of the value
367- if comment_key in parsed_ec :
368- if comment .replace ("'" , '' ).replace ('"' , '' ) not in str (parsed_ec [comment_key ]):
369- if comment_val :
370- self .comments ['iter' ].setdefault (comment_key , {})[comment_val ] = ' # ' + comment
371- else :
372- self .comments ['inline' ][comment_key ] = ' # ' + comment
470+ # inline comment for item of iterable value
471+ comment_key = before_comment .rstrip ()
472+ self .comments ['iterinline' ].setdefault (last_param_key , {})[comment_key ] = ' ' + comment
473+
474+ self .log .debug ("Extracted comments:\n %s" , pprint .pformat (self .comments , width = 120 ))
373475
374476
375477def retrieve_blocks_in_spec (spec , only_blocks , silent = False ):
0 commit comments