fix: Fix python analysis for function docstrings (#267)

giovanni-guidini · web-flow · commit 01986b6dafa1 · 2023-09-27T11:44:06.000-03:00
* fix: Fix python analysis for function docstrings context: https://l.codecov.dev/hfGkNI This is a fix for a bug that I stumbled upon today. After much investigation it boils down to this: - treesitter count's docstrings as strings, not comments (which is to be expected I suppose) - pytest counts them as comments. They won't appear in the coverage report (if function docstring) - if you select a docstring as the line of surety ancestorship, because it's not in the report, it has no labels associated with it, so ATS selects no tests for that line We were doing that (I believe) and selected a function docstring as the line in base to look for tests. No tests were found, and so ATS passed, but all tests failed. These changes fix this behavior by doing 2 things: 1. Making sure function docstrings are not in `statements` on the static analysis 2. Making sure we don't select function docstrings as the line of surety ancestorship for the actual fist line of code in a funciton. It should be `null` instead. * addressing style comments
diff --git a/codecov_cli/services/staticanalysis/analyzers/python/node_wrappers.py b/codecov_cli/services/staticanalysis/analyzers/python/node_wrappers.py
@@ -10,6 +10,43 @@ def visit(self, node):
         for c in node.children:
             self.visit(c)
 
+    def _is_function_docstring(self, node):
+        """Skips docstrings for funtions, such as this one.
+        Pytest doesn't include them in the report, so I don't think we should either,
+        at least for now.
+        """
+        # Docstrings have type 'expression_statement
+        if node.type != "expression_statement":
+            return False
+        # Docstrings for a module are OK - they show up in pytest result
+        # Docstrings for a class are OK - they show up in pytest result
+        # Docstrings for functions are NOT OK - they DONT show up in pytest result
+        # Check if it's docstring
+        has_single_child = len(node.children) == 1
+        only_child_is_string = node.children[0].type == "string"
+        # Check if is the first line of a function
+        parent_is_block = node.parent.type == "block"
+        first_exp_in_block = node.prev_named_sibling is None
+        is_in_function_context = (
+            parent_is_block and node.parent.parent.type == "function_definition"
+        )
+
+        return (
+            has_single_child
+            and only_child_is_string
+            and parent_is_block
+            and first_exp_in_block
+            and is_in_function_context
+        )
+
+    def _get_previous_sibling_that_is_not_comment_not_func_docstring(self, node):
+        curr = node.prev_named_sibling
+        while curr is not None and (
+            curr.type == "comment" or self._is_function_docstring(curr)
+        ):
+            curr = curr.prev_named_sibling
+        return curr
+
     def do_visit(self, node):
         if node.is_named:
             current_line_number = node.start_point[0] + 1
@@ -20,9 +57,20 @@ def do_visit(self, node):
                 "for_statement",
                 "while_statement",
             ):
-                if node.prev_named_sibling:
+                if self._is_function_docstring(node):
+                    # We ignore these
+                    return
+                closest_named_sibling_not_comment_that_is_in_statements = (
+                    self._get_previous_sibling_that_is_not_comment_not_func_docstring(
+                        node
+                    )
+                )
+                if closest_named_sibling_not_comment_that_is_in_statements:
                     self.analyzer.line_surety_ancestorship[current_line_number] = (
-                        node.prev_named_sibling.start_point[0] + 1
+                        closest_named_sibling_not_comment_that_is_in_statements.start_point[
+                            0
+                        ]
+                        + 1
                     )
                 self.analyzer.statements.append(
                     {
diff --git a/samples/inputs/sample_005.py b/samples/inputs/sample_005.py
@@ -0,0 +1,35 @@
+"""An important module with some documentation on top.
+   "When I created this code only me and God knew what it does. Now only God knows"
+"""
+
+# Random comment
+# Random comment
+# Random comment
+
+x = "some string"
+y = 'some string'
+w = """some string"""
+
+def well_documented_function(a, b):
+    """ Returns the value of a + b, a - b and a * b
+        As a tuple, in that order
+    """
+    plus = a + b
+    minus = a - b
+    times = a * b
+    return (plus, minus, times)
+
+def less_documented_function(a, b):
+    """ Returns tuple(a + b, a - b, a * b)"""
+    return (a + b, a - b, a * b)
+
+def commented_function(a, b):
+    # Returns tuple(a + b, a - b, a * b)
+    return (a + b, a - b, a * b)
+
+class MyClass:
+    """ This is my class, not yours u.u
+    """
+
+    def __init__(self) -> None:
+        self.owner = 'me'
diff --git a/samples/outputs/sample_005.json b/samples/outputs/sample_005.json
@@ -0,0 +1,212 @@
+{
+    "result": {
+        "language": "python",
+        "empty_lines": [
+            4,
+            8,
+            12,
+            21,
+            25,
+            29,
+            33
+        ],
+        "functions": [
+            {
+                "identifier": "well_documented_function",
+                "start_line": 13,
+                "end_line": 20,
+                "code_hash": "a3dd128ed3a7f6b87dff831df9536041",
+                "complexity_metrics": {
+                    "conditions": 0,
+                    "mccabe_cyclomatic_complexity": 1,
+                    "returns": 1,
+                    "max_nested_conditional": 0
+                }
+            },
+            {
+                "identifier": "less_documented_function",
+                "start_line": 22,
+                "end_line": 24,
+                "code_hash": "5b39073ac5cd6ae87194ea607c355836",
+                "complexity_metrics": {
+                    "conditions": 0,
+                    "mccabe_cyclomatic_complexity": 1,
+                    "returns": 1,
+                    "max_nested_conditional": 0
+                }
+            },
+            {
+                "identifier": "commented_function",
+                "start_line": 26,
+                "end_line": 28,
+                "code_hash": "26a5e6a370792d61352f402e98066508",
+                "complexity_metrics": {
+                    "conditions": 0,
+                    "mccabe_cyclomatic_complexity": 1,
+                    "returns": 1,
+                    "max_nested_conditional": 0
+                }
+            },
+            {
+                "identifier": "MyClass::__init__",
+                "start_line": 34,
+                "end_line": 35,
+                "code_hash": "da07aae9b4c31039150d1eebbf835f4b",
+                "complexity_metrics": {
+                    "conditions": 0,
+                    "mccabe_cyclomatic_complexity": 1,
+                    "returns": 0,
+                    "max_nested_conditional": 0
+                }
+            }
+        ],
+        "hash": "0623dd2b794a6f7399865865b127a7ee",
+        "number_lines": 35,
+        "statements": [
+            [
+                1,
+                {
+                    "line_surety_ancestorship": null,
+                    "start_column": 0,
+                    "line_hash": "b6ac9155701928c76d7de0f40c023b2a",
+                    "len": 2,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                9,
+                {
+                    "line_surety_ancestorship": 1,
+                    "start_column": 0,
+                    "line_hash": "968a672046ffebf7efa7d9298b53223b",
+                    "len": 0,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                10,
+                {
+                    "line_surety_ancestorship": 9,
+                    "start_column": 0,
+                    "line_hash": "7e2ea08683844b89b362b9b920aeb1f4",
+                    "len": 0,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                11,
+                {
+                    "line_surety_ancestorship": 10,
+                    "start_column": 0,
+                    "line_hash": "72bcafea9cfcf4995233dee3004c5830",
+                    "len": 0,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                17,
+                {
+                    "line_surety_ancestorship": null,
+                    "start_column": 4,
+                    "line_hash": "9237152e7a4e9da3a61c59a992ee313b",
+                    "len": 0,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                18,
+                {
+                    "line_surety_ancestorship": 17,
+                    "start_column": 4,
+                    "line_hash": "78fd33f477009fb9dec82c435b6049fd",
+                    "len": 0,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                19,
+                {
+                    "line_surety_ancestorship": 18,
+                    "start_column": 4,
+                    "line_hash": "3a8f5fe6d0cd2995e78a189e004c46cb",
+                    "len": 0,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                20,
+                {
+                    "line_surety_ancestorship": 19,
+                    "start_column": 4,
+                    "line_hash": "1de7b03608e8c92e83a094f044863159",
+                    "len": 0,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                24,
+                {
+                    "line_surety_ancestorship": null,
+                    "start_column": 4,
+                    "line_hash": "26a5e6a370792d61352f402e98066508",
+                    "len": 0,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                28,
+                {
+                    "line_surety_ancestorship": null,
+                    "start_column": 4,
+                    "line_hash": "26a5e6a370792d61352f402e98066508",
+                    "len": 0,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                31,
+                {
+                    "line_surety_ancestorship": null,
+                    "start_column": 4,
+                    "line_hash": "9185d2f3d96e27b9446aa4361a60c996",
+                    "len": 1,
+                    "extra_connected_lines": []
+                }
+            ],
+            [
+                35,
+                {
+                    "line_surety_ancestorship": null,
+                    "start_column": 8,
+                    "line_hash": "da07aae9b4c31039150d1eebbf835f4b",
+                    "len": 0,
+                    "extra_connected_lines": []
+                }
+            ]
+        ],
+        "definition_lines": [
+            [
+                13,
+                7
+            ],
+            [
+                22,
+                2
+            ],
+            [
+                26,
+                2
+            ],
+            [
+                30,
+                5
+            ],
+            [
+                34,
+                1
+            ]
+        ],
+        "import_lines": []
+    },
+    "error": null
+}
diff --git a/tests/services/static_analysis/test_analyse_file.py b/tests/services/static_analysis/test_analyse_file.py
@@ -18,6 +18,7 @@
         ("samples/inputs/sample_002.py", "samples/outputs/sample_002.json"),
         ("samples/inputs/sample_003.js", "samples/outputs/sample_003.json"),
         ("samples/inputs/sample_004.js", "samples/outputs/sample_004.json"),
+        ("samples/inputs/sample_005.py", "samples/outputs/sample_005.json"),
     ],
 )
 def test_sample_analysis(input_filename, output_filename):

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@`
`18`	`18`	`("samples/inputs/sample_002.py", "samples/outputs/sample_002.json"),`
`19`	`19`	`("samples/inputs/sample_003.js", "samples/outputs/sample_003.json"),`
`20`	`20`	`("samples/inputs/sample_004.js", "samples/outputs/sample_004.json"),`
	`21`	`+ ("samples/inputs/sample_005.py", "samples/outputs/sample_005.json"),`
`21`	`22`	`],`
`22`	`23`	`)`
`23`	`24`	`def test_sample_analysis(input_filename, output_filename):`