improving code parsing

samuelcolvin · samuelcolvin · commit 0beeb333cb26 · 2017-08-20T21:00:47.000+01:00
diff --git a/devtools/debug.py b/devtools/debug.py
@@ -4,7 +4,7 @@
 import warnings
 from pathlib import Path
 from textwrap import dedent
-from typing import Generator, List
+from typing import Generator, List, Optional, Tuple
 
 __all__ = ['Debug', 'debug']
 CWD = Path('.').resolve()
@@ -66,6 +66,11 @@ class Debug:
     output_class = DebugOutput
     # 50 lines should be enough to make sure we always get the entire function definition
     frame_context_length = 50
+    complex_nodes = (
+        ast.Call,
+        ast.IfExp, ast.BoolOp, ast.BinOp, ast.Compare,
+        ast.DictComp, ast.ListComp, ast.SetComp, ast.GeneratorExp
+    )
 
     def __call__(self, *args, **kwargs):
         print(self._process(args, kwargs, r'debug *\('), flush=True)
@@ -88,26 +93,23 @@ def _process(self, args, kwargs, func_regex):
                 # happens if filename path is not within CWD
                 pass
 
-        call_lines = []
-        # print(call_frame)
-        # from pprint import pprint
-        # pprint(call_frame.code_context)
         if call_frame.code_context:
-            for line in range(call_frame.index, 0, -1):
-                new_line = call_frame.code_context[line]
-                call_lines.append(new_line)
-                if re.search(func_regex, new_line):
-                    break
-            call_lines.reverse()
-            lineno = call_frame.lineno - len(call_lines) + 1
+            func_ast, code_lines, lineno = self._parse_code(call_frame, func_regex, filename)
+            if func_ast:
+                arguments = list(self._process_args(func_ast, code_lines, args, kwargs))
+            else:
+                # parsing failed
+                arguments = list(self._args_inspection_failed(args, kwargs))
         else:
-            lineno = call_frame.lineno - len(call_lines)
+            lineno = call_frame.lineno
+            warnings.warn('no code context for debug call, code inspection impossible', RuntimeWarning)
+            arguments = list(self._args_inspection_failed(args, kwargs))
 
         return self.output_class(
             filename=filename,
             lineno=lineno,
             frame=call_frame.function,
-            arguments=list(self._process_args(call_lines, args, kwargs, call_frame))
+            arguments=arguments
         )
 
     def _args_inspection_failed(self, args, kwargs):
@@ -116,44 +118,12 @@ def _args_inspection_failed(self, args, kwargs):
         for name, value in kwargs.items():
             yield self.output_class.arg_class(value, name=name)
 
-    def _process_args(self, call_lines, args, kwargs, call_frame) -> Generator[DebugArgument, None, None]:  # noqa: C901
-        if not call_lines:
-            warnings.warn('no code context for debug call, code inspection impossible', RuntimeWarning)
-            yield from self._args_inspection_failed(args, kwargs)
-            return
-
-        code = dedent(''.join(call_lines))
-        # print(code)
-        try:
-            func_ast = ast.parse(code).body[0].value
-        except SyntaxError as e1:
-            # if the trailing bracket of the function is on a new line eg.
-            # debug(
-            #     foo, bar,
-            # )
-            # inspect ignores it with index and we have to add it back
-            code2 = code + call_frame.code_context[call_frame.index + 1]
-            try:
-                func_ast = ast.parse(code2).body[0].value
-            except SyntaxError:
-                warnings.warn('error passing code:\n"{}"\nError: {}'.format(code, e1), SyntaxWarning)
-                yield from self._args_inspection_failed(args, kwargs)
-                return
-            else:
-                code = code2
-
-        code_lines = [l for l in code.split('\n') if l]
-        # this removes the trailing bracket from the lines of code meaning it doesn't appear in the
-        # representation of the last argument
-        code_lines[-1] = code_lines[-1][:-1]
-
+    def _process_args(self, func_ast, code_lines, args, kwargs) -> Generator[DebugArgument, None, None]:  # noqa: C901
         arg_offsets = list(self._get_offsets(func_ast))
         for arg, ast_node, i in zip(args, func_ast.args, range(1000)):
             if isinstance(ast_node, ast.Name):
                 yield self.output_class.arg_class(arg, name=ast_node.id)
-            elif isinstance(ast_node, (ast.Str, ast.Bytes, ast.Num, ast.List, ast.Dict, ast.Set)):
-                yield self.output_class.arg_class(arg)
-            elif isinstance(ast_node, (ast.Call, ast.Compare)):
+            elif isinstance(ast_node, self.complex_nodes):
                 # TODO replace this hack with astor when it get's round to a new release
                 start_line, start_col = ast_node.lineno - 1, ast_node.col_offset
                 end_line, end_col = len(code_lines) - 1, None
@@ -170,7 +140,6 @@ def _process_args(self, call_lines, args, kwargs, call_frame) -> Generator[Debug
                     )
                 yield self.output_class.arg_class(arg, name=' '.join(name_lines).strip(' ,'))
             else:
-                warnings.warn('Unknown type: {}'.format(ast.dump(ast_node)), RuntimeWarning)
                 yield self.output_class.arg_class(arg)
 
         kw_arg_names = {}
@@ -180,6 +149,47 @@ def _process_args(self, call_lines, args, kwargs, call_frame) -> Generator[Debug
         for name, value in kwargs.items():
             yield self.output_class.arg_class(value, name=name, variable=kw_arg_names.get(name))
 
+    def _parse_code(self, call_frame, func_regex, filename) -> Tuple[Optional[ast.AST], Optional[List[str]], int]:
+        call_lines = []
+        for line in range(call_frame.index, 0, -1):
+            new_line = call_frame.code_context[line]
+            call_lines.append(new_line)
+            if re.search(func_regex, new_line):
+                break
+        call_lines.reverse()
+        lineno = call_frame.lineno - len(call_lines) + 1
+
+        original_code = code = dedent(''.join(call_lines))
+        func_ast = None
+        tail_index = call_frame.index
+        try:
+            func_ast = ast.parse(code, filename=filename).body[0].value
+        except SyntaxError as e1:
+            # if the trailing bracket(s) of the function is/are on a new line eg.
+            # debug(
+            #     foo, bar,
+            # )
+            # inspect ignores it when setting index and we have to add it back
+            for extra in range(2, 6):
+                extra_lines = call_frame.code_context[tail_index + 1:tail_index + extra]
+                code = dedent(''.join(call_lines + extra_lines))
+                try:
+                    func_ast = ast.parse(code).body[0].value
+                except SyntaxError:
+                    pass
+                else:
+                    break
+
+            if not func_ast:
+                warnings.warn('error passing code:\n"{}"\nError: {}'.format(original_code, e1), SyntaxWarning)
+                return None, None, lineno
+
+        code_lines = [l for l in code.split('\n') if l]
+        # this removes the trailing bracket from the lines of code meaning it doesn't appear in the
+        # representation of the last argument
+        code_lines[-1] = code_lines[-1][:-1]
+        return func_ast, code_lines, lineno
+
     @classmethod
     def _get_offsets(cls, func_ast):
         for arg in func_ast.args:
diff --git a/tests/test_expr_render.py b/tests/test_expr_render.py
@@ -15,19 +15,52 @@ def test_simple():
     v = debug.format(len(a))
     s = re.sub(':\d{2,}', ':<line no>', str(v))
     # print(s)
-    assert s == (
+    assert (
         'tests/test_expr_render.py:<line no> test_simple: len(a) = 3 (int)'
+    ) == s
+
+
+def test_exotic_types():
+    aa = [1, 2, 3]
+    v = debug.format(
+        sum(aa),
+        1 == 2,
+        1 < 2,
+        1 << 2,
+        't' if True else 'f',
+        1 or 2,
+        [a for a in aa],
+        {a for a in aa},
+        {a: a + 1 for a in aa},
+        (a for a in aa),
     )
+    s = re.sub(r':\d{2,}', ':<line no>', str(v))
+    s = re.sub(r'(at 0x)\w+', r'\1<hash>', s)
+    print(s)
+    # list and generator comprehensions are wrong because ast is wrong, see https://bugs.python.org/issue31241
+    assert (
+        'tests/test_expr_render.py:<line no> test_exotic_types\n'
+        '  sum(aa) = 6 (int)\n'
+        '  1 == 2 = False (bool)\n'
+        '  1 < 2 = True (bool)\n'
+        '  1 << 2 = 4 (int)\n'
+        '  \'t\' if True else \'f\' = "t" (str) len=1\n'
+        '  1 or 2, [ = 1 (int)\n'
+        '  a for a in aa] = [1, 2, 3] (list)\n'
+        '  {a for a in aa} = {1, 2, 3} (set)\n'
+        '  {a: a + 1 for a in aa}, ( = {1: 2, 2: 3, 3: 4} (dict)\n'
+        '  a for a in aa) = <generator object test_exotic_types.<locals>.<genexpr> at 0x<hash>> (generator)'
+    ) == s
 
 
 def test_newline():
     v = debug.format(
         foobar(1, 2, 3))
     s = re.sub(':\d{2,}', ':<line no>', str(v))
     # print(s)
-    assert s == (
+    assert (
         'tests/test_expr_render.py:<line no> test_newline: foobar(1, 2, 3) = 6 (int)'
-    )
+    ) == s
 
 
 def test_trailing_bracket():
@@ -36,9 +69,9 @@ def test_trailing_bracket():
     )
     s = re.sub(':\d{2,}', ':<line no>', str(v))
     # print(s)
-    assert s == (
+    assert (
         'tests/test_expr_render.py:<line no> test_trailing_bracket: foobar(1, 2, 3) = 6 (int)'
-    )
+    ) == s
 
 
 def test_multiline():
@@ -49,9 +82,9 @@ def test_multiline():
     )
     s = re.sub(':\d{2,}', ':<line no>', str(v))
     # print(s)
-    assert s == (
+    assert (
         'tests/test_expr_render.py:<line no> test_multiline: foobar(1, 2, 3) = 6 (int)'
-    )
+    ) == s
 
 
 def test_multiline_trailing_bracket():
@@ -60,9 +93,9 @@ def test_multiline_trailing_bracket():
                ))
     s = re.sub(':\d{2,}', ':<line no>', str(v))
     # print(s)
-    assert s == (
+    assert (
         'tests/test_expr_render.py:<line no> test_multiline_trailing_bracket: foobar(1, 2, 3 ) = 6 (int)'
-    )
+    ) == s
 
 
 @pytest.mark.skipif(sys.version_info < (3, 6), reason='kwarg order is not guaranteed for 3.5')
@@ -73,13 +106,12 @@ def test_kwargs():
         b=7
     )
     s = re.sub(':\d{2,}', ':<line no>', str(v))
-    assert s == (
+    assert (
         'tests/test_expr_render.py:<line no> test_kwargs\n'
         '  foobar(1, 2, 3) = 6 (int)\n'
         '  a = 6 (int)\n'
         '  b = 7 (int)'
-
-    )
+    ) == s
 
 
 @pytest.mark.skipif(sys.version_info < (3, 6), reason='kwarg order is not guaranteed for 3.5')
@@ -91,10 +123,47 @@ def test_kwargs_multiline():
         b=7
     )
     s = re.sub(':\d{2,}', ':<line no>', str(v))
-    assert s == (
+    assert (
         'tests/test_expr_render.py:<line no> test_kwargs_multiline\n'
         '  foobar(1, 2, 3) = 6 (int)\n'
         '  a = 6 (int)\n'
         '  b = 7 (int)'
+    ) == s
+
 
+def test_multiple_trailing_lines():
+    v = debug.format(
+        foobar(
+            1, 2, 3
+        ),
     )
+    s = re.sub(':\d{2,}', ':<line no>', str(v))
+    assert (
+        'tests/test_expr_render.py:<line no> test_multiple_trailing_lines: foobar( 1, 2, 3 ) = 6 (int)'
+    ) == s
+
+
+def test_syntax_warning():
+    # exceed the 4 extra lines which are normally checked
+    with pytest.warns(SyntaxWarning) as warning_checker:
+        v = debug.format(
+            abs(
+                abs(
+                    abs(
+                        abs(
+                            -1
+                        )
+                    )
+                )
+            )
+        )
+    assert len(warning_checker) == 1
+    warning = warning_checker.list[0]
+    print(warning.message)
+    assert 'Error: unexpected EOF while parsing (test_expr_render.py' in str(warning.message)
+    # check only the original code is included in the warning
+    assert '-1\n"' in str(warning.message)
+    s = re.sub(':\d{2,}', ':<line no>', str(v))
+    assert (
+        'tests/test_expr_render.py:<line no> test_syntax_warning: 1 (int)'
+    ) == s
diff --git a/tests/test_main.py b/tests/test_main.py
@@ -77,6 +77,7 @@ def test_kwargs():
 
 
 def test_kwargs_orderless():
+    # for python3.5
     a = 'variable'
     v = debug.format(first=a, second='literal')
     s = re.sub(':\d{2,}', ':<line no>', str(v))
@@ -87,13 +88,40 @@ def test_kwargs_orderless():
     }
 
 
+def test_simple_vars():
+    v = debug.format('test', 1, 2)
+    s = re.sub(':\d{2,}', ':<line no>', str(v))
+    assert s == (
+        'tests/test_main.py:<line no> test_simple_vars\n'
+        '  "test" (str) len=4\n'
+        '  1 (int)\n'
+        '  2 (int)'
+    )
+    r = re.sub(':\d{2,}', ':<line no>', repr(v))
+    assert r == (
+        '<DebugOutput tests/test_main.py:<line no> test_simple_vars arguments: "test" (str) len=4 1 (int) 2 (int)>'
+    )
+
+
 def test_eval():
     with pytest.warns(RuntimeWarning):
         v = eval('debug.format(1)')
 
     assert str(v) == '<string>:1 <module>: 1 (int)'
 
 
+@pytest.mark.skipif(sys.version_info < (3, 6), reason='kwarg order is not guaranteed for 3.5')
+def test_eval_kwargs():
+    with pytest.warns(RuntimeWarning):
+        v = eval('debug.format(1, apple="pear")')
+
+    assert str(v) == (
+        '<string>:1 <module>\n'
+        '  1 (int)\n'
+        '  apple = "pear" (str) len=4'
+    )
+
+
 def test_exec(capsys):
     with pytest.warns(RuntimeWarning):
         exec(