Merge pull request github#3790 from RasmusWL/python-add-annotated-callgraph-tests

tausbn · web-flow · commit df3eb9f9c543 · 2020-07-10T15:38:38.000+02:00
Python: Add annotated call-graph tests
diff --git a/python/ql/test/experimental/library-tests/CallGraph-xfail/CallGraphTest.qll b/python/ql/test/experimental/library-tests/CallGraph-xfail/CallGraphTest.qll
@@ -0,0 +1 @@
+../CallGraph/CallGraphTest.qll
diff --git a/python/ql/test/experimental/library-tests/CallGraph-xfail/PointsTo.expected b/python/ql/test/experimental/library-tests/CallGraph-xfail/PointsTo.expected
@@ -0,0 +1,18 @@
+debug_missingAnnotationForCallable
+| annotation_xfail.py:10:1:10:24 | callable_not_annotated() | This call is annotated with 'callable_not_annotated', but no callable with that annotation was extracted. Please fix. |
+debug_nonUniqueAnnotationForCallable
+| annotation_xfail.py:13:1:13:17 | Function non_unique | Multiple callables are annotated with 'non_unique'. Please fix. |
+| annotation_xfail.py:17:1:17:26 | Function too_much_copy_paste | Multiple callables are annotated with 'non_unique'. Please fix. |
+debug_missingAnnotationForCall
+| annotation_xfail.py:2:1:2:24 | Function no_annotated_call | This callable is annotated with 'no_annotated_call', but no call with that annotation was extracted. Please fix. |
+expectedCallEdgeNotFound
+| call_edge_xfail.py:36:1:36:11 | xfail_foo() | call_edge_xfail.py:8:1:8:16 | Function xfail_bar |
+| call_edge_xfail.py:39:1:39:11 | xfail_baz() | call_edge_xfail.py:8:1:8:16 | Function xfail_bar |
+unexpectedCallEdgeFound
+| call_edge_xfail.py:29:1:29:6 | func() | call_edge_xfail.py:4:1:4:16 | Function xfail_foo | Call resolved to the callable named 'xfail_foo' but was not annotated as such |
+| call_edge_xfail.py:29:1:29:6 | func() | call_edge_xfail.py:8:1:8:16 | Function xfail_bar | Call resolved to the callable named 'xfail_bar' but was not annotated as such |
+| call_edge_xfail.py:30:1:30:11 | xfail_foo() | call_edge_xfail.py:4:1:4:16 | Function xfail_foo | Call resolved to the callable named 'xfail_foo' but was not annotated as such |
+| call_edge_xfail.py:31:1:31:14 | xfail_lambda() | call_edge_xfail.py:15:16:15:44 | Function lambda | Call resolved to the callable named 'xfail_lambda' but was not annotated as such |
+| call_edge_xfail.py:36:1:36:11 | xfail_foo() | call_edge_xfail.py:4:1:4:16 | Function xfail_foo | Call resolved to the callable named 'xfail_foo' but was not annotated as such |
+| call_edge_xfail.py:39:1:39:11 | xfail_baz() | call_edge_xfail.py:11:1:11:16 | Function xfail_baz | Annotated call resolved to unannotated callable |
+| call_edge_xfail.py:43:1:43:6 | func() | call_edge_xfail.py:8:1:8:16 | Function xfail_bar | Call resolved to the callable named 'xfail_bar' but was not annotated as such |
diff --git a/python/ql/test/experimental/library-tests/CallGraph-xfail/PointsTo.ql b/python/ql/test/experimental/library-tests/CallGraph-xfail/PointsTo.ql
@@ -0,0 +1 @@
+../CallGraph/PointsTo.ql
diff --git a/python/ql/test/experimental/library-tests/CallGraph-xfail/README.md b/python/ql/test/experimental/library-tests/CallGraph-xfail/README.md
@@ -0,0 +1 @@
+Test that show our failure handling in [CallGraph](../CallGraph/) works as expected.
diff --git a/python/ql/test/experimental/library-tests/CallGraph-xfail/annotation_xfail.py b/python/ql/test/experimental/library-tests/CallGraph-xfail/annotation_xfail.py
@@ -0,0 +1,21 @@
+# name:no_annotated_call
+def no_annotated_call():
+    pass
+
+def callable_not_annotated():
+    pass
+
+no_annotated_call()
+# calls:callable_not_annotated
+callable_not_annotated()
+
+# name:non_unique
+def non_unique():
+    pass
+
+# name:non_unique
+def too_much_copy_paste():
+    pass
+
+# calls:non_unique
+non_unique()
diff --git a/python/ql/test/experimental/library-tests/CallGraph-xfail/call_edge_xfail.py b/python/ql/test/experimental/library-tests/CallGraph-xfail/call_edge_xfail.py
@@ -0,0 +1,43 @@
+import sys
+
+# name:xfail_foo
+def xfail_foo():
+    print('xfail_foo')
+
+# name:xfail_bar
+def xfail_bar():
+    print('xfail_bar')
+
+def xfail_baz():
+    print('xfail_baz')
+
+# name:xfail_lambda
+xfail_lambda = lambda: print('xfail_lambda')
+
+if len(sys.argv) >= 2 and not sys.argv[1] in ['0', 'False', 'false']:
+    func = xfail_foo
+else:
+    func = xfail_bar
+
+# Correct usage to suppress bad annotation errors
+# calls:xfail_foo calls:xfail_bar
+func()
+# calls:xfail_lambda
+xfail_lambda()
+
+# These are not annotated, and will give rise to unexpectedCallEdgeFound
+func()
+xfail_foo()
+xfail_lambda()
+
+# These are annotated wrongly, and will give rise to unexpectedCallEdgeFound
+
+# calls:xfail_bar
+xfail_foo()
+
+# calls:xfail_bar
+xfail_baz()
+
+# The annotation is incomplete (does not include the call to xfail_bar)
+# calls:xfail_foo
+func()
diff --git a/python/ql/test/experimental/library-tests/CallGraph/CallGraphTest.qll b/python/ql/test/experimental/library-tests/CallGraph/CallGraphTest.qll
@@ -0,0 +1,147 @@
+import python
+
+/** Gets the comment on the line above `ast` */
+Comment commentFor(AstNode ast) {
+    exists(int line | line = ast.getLocation().getStartLine() - 1 |
+        result
+                .getLocation()
+                .hasLocationInfo(ast.getLocation().getFile().getAbsolutePath(), line, _, line, _)
+    )
+}
+
+/** Gets the value from `tag:value` in the comment for `ast` */
+string getAnnotation(AstNode ast, string tag) {
+    exists(Comment comment, string match, string theRegex |
+        theRegex = "([\\w]+):([\\w.]+)" and
+        comment = commentFor(ast) and
+        match = comment.getText().regexpFind(theRegex, _, _) and
+        tag = match.regexpCapture(theRegex, 1) and
+        result = match.regexpCapture(theRegex, 2)
+    )
+}
+
+/** Gets a callable annotated with `name:name` */
+Function annotatedCallable(string name) { name = getAnnotation(result, "name") }
+
+/** Gets a call annotated with `calls:name` */
+Call annotatedCall(string name) { name = getAnnotation(result, "calls") }
+
+predicate missingAnnotationForCallable(string name, Call call) {
+    call = annotatedCall(name) and
+    not exists(annotatedCallable(name))
+}
+
+predicate nonUniqueAnnotationForCallable(string name, Function callable) {
+    strictcount(annotatedCallable(name)) > 1 and
+    callable = annotatedCallable(name)
+}
+
+predicate missingAnnotationForCall(string name, Function callable) {
+    not exists(annotatedCall(name)) and
+    callable = annotatedCallable(name)
+}
+
+/** There is an obvious problem with the annotation `name` */
+predicate nameInErrorState(string name) {
+    missingAnnotationForCallable(name, _)
+    or
+    nonUniqueAnnotationForCallable(name, _)
+    or
+    missingAnnotationForCall(name, _)
+}
+
+/** Source code has annotation with `name` showing that `call` will call `callable` */
+predicate annotatedCallEdge(string name, Call call, Function callable) {
+    not nameInErrorState(name) and
+    call = annotatedCall(name) and
+    callable = annotatedCallable(name)
+}
+
+// ------------------------- Annotation debug query predicates -------------------------
+query predicate debug_missingAnnotationForCallable(Call call, string message) {
+    exists(string name |
+        message =
+            "This call is annotated with '" + name +
+                "', but no callable with that annotation was extracted. Please fix." and
+        missingAnnotationForCallable(name, call)
+    )
+}
+
+query predicate debug_nonUniqueAnnotationForCallable(Function callable, string message) {
+    exists(string name |
+        message = "Multiple callables are annotated with '" + name + "'. Please fix." and
+        nonUniqueAnnotationForCallable(name, callable)
+    )
+}
+
+query predicate debug_missingAnnotationForCall(Function callable, string message) {
+    exists(string name |
+        message =
+            "This callable is annotated with '" + name +
+                "', but no call with that annotation was extracted. Please fix." and
+        missingAnnotationForCall(name, callable)
+    )
+}
+
+// ------------------------- Call Graph resolution -------------------------
+private newtype TCallGraphResolver =
+    TPointsToResolver() or
+    TTypeTrackerResolver()
+
+/** Describes a method of call graph resolution */
+abstract class CallGraphResolver extends TCallGraphResolver {
+    abstract predicate callEdge(Call call, Function callable);
+
+    /**
+     * Holds if annotations show that `call` will call `callable`,
+     * but our call graph resolver was not able to figure that out
+     */
+    predicate expectedCallEdgeNotFound(Call call, Function callable) {
+        annotatedCallEdge(_, call, callable) and
+        not this.callEdge(call, callable)
+    }
+
+    /**
+     * Holds if there are no annotations that show that `call` will call `callable` (where at least one of these are annotated),
+     * but the call graph resolver claims that `call` will call `callable`
+     */
+    predicate unexpectedCallEdgeFound(Call call, Function callable, string message) {
+        this.callEdge(call, callable) and
+        not annotatedCallEdge(_, call, callable) and
+        (
+            exists(string name |
+                message = "Call resolved to the callable named '" + name + "' but was not annotated as such" and
+                callable = annotatedCallable(name) and
+                not nameInErrorState(name)
+            )
+            or
+            exists(string name |
+                message = "Annotated call resolved to unannotated callable" and
+                call = annotatedCall(name) and
+                not nameInErrorState(name) and
+                not exists( | callable = annotatedCallable(_))
+            )
+        )
+    }
+
+    string toString() { result = "CallGraphResolver" }
+}
+
+/** A call graph resolver based on the existing points-to analysis */
+class PointsToResolver extends CallGraphResolver, TPointsToResolver {
+    override predicate callEdge(Call call, Function callable) {
+        exists(PythonFunctionValue funcValue |
+            funcValue.getScope() = callable and
+            call = funcValue.getACall().getNode()
+        )
+    }
+
+    override string toString() { result = "PointsToResolver" }
+}
+
+/** A call graph resolved based on Type Trackers */
+class TypeTrackerResolver extends CallGraphResolver, TTypeTrackerResolver {
+    override predicate callEdge(Call call, Function callable) { none() }
+
+    override string toString() { result = "TypeTrackerResolver" }
+}
diff --git a/python/ql/test/experimental/library-tests/CallGraph/PointsTo.expected b/python/ql/test/experimental/library-tests/CallGraph/PointsTo.expected
@@ -0,0 +1,6 @@
+debug_missingAnnotationForCallable
+debug_nonUniqueAnnotationForCallable
+debug_missingAnnotationForCall
+expectedCallEdgeNotFound
+| code/underscore_prefix_func_name.py:16:5:16:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
+unexpectedCallEdgeFound
diff --git a/python/ql/test/experimental/library-tests/CallGraph/PointsTo.ql b/python/ql/test/experimental/library-tests/CallGraph/PointsTo.ql
@@ -0,0 +1,10 @@
+import python
+import CallGraphTest
+
+query predicate expectedCallEdgeNotFound(Call call, Function callable) {
+    any(PointsToResolver r).expectedCallEdgeNotFound(call, callable)
+}
+
+query predicate unexpectedCallEdgeFound(Call call, Function callable, string message) {
+    any(PointsToResolver r).unexpectedCallEdgeFound(call, callable, message)
+}
diff --git a/python/ql/test/experimental/library-tests/CallGraph/README.md b/python/ql/test/experimental/library-tests/CallGraph/README.md
@@ -0,0 +1,38 @@
+# Call Graph Tests
+
+A small testing framework for our call graph resolution. It relies on manual annotation of calls and callables, **and will only include output if something is wrong**. For example, if we are not able to resolve that the `foo()` call will call the `foo` function, that should give an alert.
+
+```py
+# name:foo
+def foo():
+    pass
+# calls:foo
+foo()
+```
+
+This is greatly inspired by [`CallGraphs/AnnotatedTest`](https://github.com/github/codeql/blob/696d19cb1440b6f6a75c6a2c1319e18860ceb436/javascript/ql/test/library-tests/CallGraphs/AnnotatedTest/Test.ql) from JavaScript.
+
+IMPORTANT: Names used in annotations are not scoped, so must be unique globally. (this is a bit annoying, but makes things simple). If multiple identical annotations are used, an error message will be output.
+
+Important files:
+
+- `CallGraphTest.qll`: main code to find annotated calls/callables and setting everything up.
+- `PointsTo.ql`: results when using points-to for call graph resolution.
+- `TypeTracker.ql`: results when using TypeTracking for call graph resolution.
+- `Relative.ql`: differences between using points-to and TypeTracking.
+- `code/` contains the actual Python code we test against (included by `test.py`).
+
+All queries will also execute some `debug_*` predicates. These highlight any obvious problems with the annotation setup, and so there should never be any results committed. To show that this works as expected, see the [CallGraph-xfail](../CallGraph-xfail/)  which uses symlinked versions of the files in this directory (can't include as subdir, so has to be a sibling).
+
+## `options` file
+
+If the value for `--max-import-depth` is set so that `import random` will extract `random.py` from the standard library, BUT NO transitive imports are extracted, then points-to analysis will fail to handle the following snippet.
+
+```py
+import random
+if random.random() < 0.5:
+    func = foo
+else:
+    func = bar
+func()
+```
diff --git a/python/ql/test/experimental/library-tests/CallGraph/Relative.expected b/python/ql/test/experimental/library-tests/CallGraph/Relative.expected
@@ -0,0 +1,20 @@
+debug_missingAnnotationForCallable
+debug_nonUniqueAnnotationForCallable
+debug_missingAnnotationForCall
+pointsTo_found_typeTracker_notFound
+| code/class_simple.py:28:1:28:15 | Attribute() | code/class_simple.py:8:5:8:26 | Function some_method |
+| code/class_simple.py:30:1:30:21 | Attribute() | code/class_simple.py:13:5:13:28 | Function some_staticmethod |
+| code/class_simple.py:32:1:32:20 | Attribute() | code/class_simple.py:18:5:18:30 | Function some_classmethod |
+| code/class_simple.py:35:1:35:21 | Attribute() | code/class_simple.py:13:5:13:28 | Function some_staticmethod |
+| code/class_simple.py:37:1:37:20 | Attribute() | code/class_simple.py:18:5:18:30 | Function some_classmethod |
+| code/runtime_decision.py:21:1:21:6 | func() | code/runtime_decision.py:8:1:8:13 | Function rd_foo |
+| code/runtime_decision.py:21:1:21:6 | func() | code/runtime_decision.py:12:1:12:13 | Function rd_bar |
+| code/runtime_decision.py:30:1:30:7 | func2() | code/runtime_decision.py:8:1:8:13 | Function rd_foo |
+| code/runtime_decision.py:30:1:30:7 | func2() | code/runtime_decision.py:12:1:12:13 | Function rd_bar |
+| code/simple.py:19:1:19:5 | foo() | code/simple.py:2:1:2:10 | Function foo |
+| code/simple.py:21:1:21:14 | indirect_foo() | code/simple.py:2:1:2:10 | Function foo |
+| code/simple.py:23:1:23:5 | bar() | code/simple.py:10:1:10:10 | Function bar |
+| code/simple.py:25:1:25:5 | lam() | code/simple.py:15:7:15:36 | Function lambda |
+| code/underscore_prefix_func_name.py:21:5:21:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
+| code/underscore_prefix_func_name.py:25:5:25:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
+pointsTo_notFound_typeTracker_found
diff --git a/python/ql/test/experimental/library-tests/CallGraph/Relative.ql b/python/ql/test/experimental/library-tests/CallGraph/Relative.ql
@@ -0,0 +1,15 @@
+import python
+
+import CallGraphTest
+
+query predicate pointsTo_found_typeTracker_notFound(Call call, Function callable) {
+    annotatedCallEdge(_, call, callable) and
+    any(PointsToResolver r).callEdge(call, callable) and
+    not any(TypeTrackerResolver r).callEdge(call, callable)
+}
+
+query predicate pointsTo_notFound_typeTracker_found(Call call, Function callable) {
+    annotatedCallEdge(_, call, callable) and
+    not any(PointsToResolver r).callEdge(call, callable) and
+    any(TypeTrackerResolver r).callEdge(call, callable)
+}
diff --git a/python/ql/test/experimental/library-tests/CallGraph/TypeTracker.expected b/python/ql/test/experimental/library-tests/CallGraph/TypeTracker.expected
@@ -0,0 +1,21 @@
+debug_missingAnnotationForCallable
+debug_nonUniqueAnnotationForCallable
+debug_missingAnnotationForCall
+expectedCallEdgeNotFound
+| code/class_simple.py:28:1:28:15 | Attribute() | code/class_simple.py:8:5:8:26 | Function some_method |
+| code/class_simple.py:30:1:30:21 | Attribute() | code/class_simple.py:13:5:13:28 | Function some_staticmethod |
+| code/class_simple.py:32:1:32:20 | Attribute() | code/class_simple.py:18:5:18:30 | Function some_classmethod |
+| code/class_simple.py:35:1:35:21 | Attribute() | code/class_simple.py:13:5:13:28 | Function some_staticmethod |
+| code/class_simple.py:37:1:37:20 | Attribute() | code/class_simple.py:18:5:18:30 | Function some_classmethod |
+| code/runtime_decision.py:21:1:21:6 | func() | code/runtime_decision.py:8:1:8:13 | Function rd_foo |
+| code/runtime_decision.py:21:1:21:6 | func() | code/runtime_decision.py:12:1:12:13 | Function rd_bar |
+| code/runtime_decision.py:30:1:30:7 | func2() | code/runtime_decision.py:8:1:8:13 | Function rd_foo |
+| code/runtime_decision.py:30:1:30:7 | func2() | code/runtime_decision.py:12:1:12:13 | Function rd_bar |
+| code/simple.py:19:1:19:5 | foo() | code/simple.py:2:1:2:10 | Function foo |
+| code/simple.py:21:1:21:14 | indirect_foo() | code/simple.py:2:1:2:10 | Function foo |
+| code/simple.py:23:1:23:5 | bar() | code/simple.py:10:1:10:10 | Function bar |
+| code/simple.py:25:1:25:5 | lam() | code/simple.py:15:7:15:36 | Function lambda |
+| code/underscore_prefix_func_name.py:16:5:16:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
+| code/underscore_prefix_func_name.py:21:5:21:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
+| code/underscore_prefix_func_name.py:25:5:25:19 | some_function() | code/underscore_prefix_func_name.py:10:1:10:20 | Function some_function |
+unexpectedCallEdgeFound
diff --git a/python/ql/test/experimental/library-tests/CallGraph/TypeTracker.ql b/python/ql/test/experimental/library-tests/CallGraph/TypeTracker.ql
@@ -0,0 +1,10 @@
+import python
+import CallGraphTest
+
+query predicate expectedCallEdgeNotFound(Call call, Function callable) {
+    any(TypeTrackerResolver r).expectedCallEdgeNotFound(call, callable)
+}
+
+query predicate unexpectedCallEdgeFound(Call call, Function callable, string message) {
+    any(TypeTrackerResolver r).unexpectedCallEdgeFound(call, callable, message)
+}
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_advanced.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_advanced.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_simple.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_simple.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision.py b/python/ql/test/experimental/library-tests/CallGraph/code/runtime_decision.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/simple.py b/python/ql/test/experimental/library-tests/CallGraph/code/simple.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/underscore_prefix_func_name.py b/python/ql/test/experimental/library-tests/CallGraph/code/underscore_prefix_func_name.py
diff --git a/python/ql/test/experimental/library-tests/CallGraph/options b/python/ql/test/experimental/library-tests/CallGraph/options
diff --git a/python/ql/test/experimental/library-tests/CallGraph/test.py b/python/ql/test/experimental/library-tests/CallGraph/test.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Test that show our failure handling in [CallGraph](../CallGraph/) works as expected.`