correctly handle weirdly indented comments (and restore them with correct indent in dumped easyconfig)

boegel · boegel · commit 43238704a00b · 2019-11-24T14:37:48.000+01:00
diff --git a/easybuild/framework/easyconfig/format/one.py b/easybuild/framework/easyconfig/format/one.py
@@ -231,7 +231,10 @@ def _get_item_comments(self, key, val):
         """Get per-item comments for specified parameter name/value."""
         item_comments = {}
 
-        for comment_key, comment_val in self.comments['iterabove'].get(key, {}).items():
+        cand_above_comments = self.comments['iterabove'].get(key, {}).items()
+        cand_above_comments.extend(self.comments['above'].items())
+
+        for comment_key, comment_val in cand_above_comments:
             if str(val) in comment_key:
                 item_comments['above'] = comment_val
 
@@ -350,6 +353,7 @@ def extract_comments(self, rawtxt):
 
         parsed_ec = self.get_config_dict()
 
+        comment_regex = re.compile(r'^\s*#')
         param_def_regex = re.compile(r'^([a-z_0-9]+)\s*=')
         whitespace_regex = re.compile(r'^\s*$')
 
@@ -378,12 +382,12 @@ def split_on_comment_hash(line, param_key):
 
             return before_comment, comment.strip()
 
-        def grab_more_comment_lines(lines, indent, param_key):
-            """Grab more comment lines that match specified indent."""
+        def grab_more_comment_lines(lines, param_key):
+            """Grab more comment lines."""
 
             comment_lines = []
 
-            while lines and (lines[0].startswith(indent + '#') or whitespace_regex.match(lines[0])):
+            while lines and (comment_regex.match(lines[0]) or whitespace_regex.match(lines[0])):
                 line = lines.pop(0)
                 _, actual_comment = split_on_comment_hash(line, param_key)
                 # prefix comment with '#' unless line was empty
@@ -396,7 +400,7 @@ def grab_more_comment_lines(lines, indent, param_key):
         rawlines = rawtxt.split('\n')
 
         # extract header first (include empty lines too)
-        self.comments['header'] = grab_more_comment_lines(rawlines, '', None)
+        self.comments['header'] = grab_more_comment_lines(rawlines, None)
 
         last_param_key = None
         while rawlines:
@@ -418,7 +422,7 @@ def grab_more_comment_lines(lines, indent, param_key):
 
             # lines that start with a hash indicate (start of a block of) comment line(s)
             if rawline.startswith('#'):
-                comment = [rawline] + grab_more_comment_lines(rawlines, '', last_param_key)
+                comment = [rawline] + grab_more_comment_lines(rawlines, last_param_key)
 
                 if rawlines:
                     # try to pin comment to parameter definition below it
@@ -430,7 +434,8 @@ def grab_more_comment_lines(lines, indent, param_key):
                     else:
                         # if the comment is not above a parameter definition,
                         # just use the whole next line to determine where the comment belongs...
-                        self.comments['above'][rawlines[0]] = comment
+                        before_comment, _ = split_on_comment_hash(rawlines[0], last_param_key)
+                        self.comments['above'][before_comment.rstrip()] = comment
                 else:
                     # if there are no more lines, the comment (block) is at the tail
                     self.comments['tail'] = comment
@@ -454,7 +459,7 @@ def grab_more_comment_lines(lines, indent, param_key):
                 # then we have an indented comment, and we need to figure out for what exactly
                 elif whitespace_regex.match(before_comment):
                     # first consume possible additional comment lines with same indentation
-                    comment = [comment] + grab_more_comment_lines(rawlines, before_comment, last_param_key)
+                    comment = [comment] + grab_more_comment_lines(rawlines, last_param_key)
 
                     before_comment, inline_comment = split_on_comment_hash(rawlines.pop(0), last_param_key)
                     comment_key = before_comment.rstrip()
diff --git a/test/framework/easyconfig.py b/test/framework/easyconfig.py
@@ -1899,6 +1899,7 @@ def test_dump_comments(self):
             "# this is a header",
             "#",
             "# which may include empty comment lines",
+            "    # weirdly indented lines",
             '',
             "# or flat out empty lines",
             '',
@@ -1925,6 +1926,7 @@ def test_dump_comments(self):
             "source_urls = [",
             "    # first possible source URL",
             "    'https://example.com',",
+            "# annoying non-indented comment",
             "    'https://anotherexample.com',  # fallback URL",
             "]",
             '',
@@ -1933,7 +1935,8 @@ def test_dump_comments(self):
             "# multi > 3",
             "dependencies = [",
             "    # this dependency",
-            "    # has multiple lines above it",
+            "# has multiple lines above it",
+            "    # some of which without proper indentation...",
             "    ('foo', '1.2.3'),  # and an inline comment too",
             "    ('nocomment', '4.5'),",
             "    # last dependency, I promise",
@@ -1961,6 +1964,7 @@ def test_dump_comments(self):
 
         # check internal structure to keep track of comments
         self.assertEqual(ec.parser._formatter.comments['above'], {
+           "    'https://anotherexample.com',": ['# annoying non-indented comment'],
            'dependencies': [
                '# this is a multiline comment above dependencies',
                '# I said multiline',
@@ -1977,6 +1981,7 @@ def test_dump_comments(self):
             '# this is a header',
             '#',
             '# which may include empty comment lines',
+            '# weirdly indented lines',
             '',
             '# or flat out empty lines',
             '',
@@ -1988,7 +1993,8 @@ def test_dump_comments(self):
         self.assertEqual(ec.parser._formatter.comments['iterabove'], {
             'dependencies': {
                 "    ('foo', '1.2.3'),": ['# this dependency',
-                                          '# has multiple lines above it'],
+                                          '# has multiple lines above it',
+                                          "# some of which without proper indentation..."],
                 "    ('last', '1.2.3'),": ['# last dependency, I promise'],
                 ']': ['# trailing comments in dependencies', '# a bit weird, but it happens'],
             },
@@ -2029,6 +2035,7 @@ def test_dump_comments(self):
             '# this is a header',
             '#',
             '# which may include empty comment lines',
+            '# weirdly indented lines',
             '',
             '# or flat out empty lines',
             '',
@@ -2081,6 +2088,7 @@ def test_dump_comments(self):
                 r'',
                 r"    # this dependency",
                 r"    # has multiple lines above it",
+                r"    # some of which without proper indentation\.\.\.",
                 r"    \('foo', '1\.2\.3'\),  # and an inline comment too",
             ]),
             '\n'.join([
@@ -2103,6 +2111,11 @@ def test_dump_comments(self):
                 r"    # first possible source URL",
                 r"    'https://example\.com',",
             ]),
+            '\n'.join([
+                '',
+                r"    # annoying non-indented comment",
+                r"    'https://anotherexample\.com',  # fallback URL",
+            ]),
         ]
         for pattern in patterns:
             regex = re.compile(pattern, re.M)