diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll index 11bebd975f0b..ffc258d6c404 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll @@ -149,11 +149,16 @@ private newtype TDefOrUseImpl = private predicate isGlobalUse( GlobalLikeVariable v, IRFunction f, int indirection, int indirectionIndex ) { - exists(VariableAddressInstruction vai | - vai.getEnclosingIRFunction() = f and - vai.getAstVariable() = v and - isDef(_, _, _, vai, indirection, indirectionIndex) - ) + // Generate a "global use" at the end of the function body if there's a + // direct definition somewhere in the body of the function + indirection = + min(int cand, VariableAddressInstruction vai | + vai.getEnclosingIRFunction() = f and + vai.getAstVariable() = v and + isDef(_, _, _, vai, cand, indirectionIndex) + | + cand + ) } private predicate isGlobalDefImpl( @@ -447,6 +452,57 @@ class FinalParameterUse extends UseImpl, TFinalParameterUse { } } +/** + * A use that models a synthetic "last use" of a global variable just before a + * function returns. + * + * We model global variable flow by: + * - Inserting a last use of any global variable that's modified by a function + * - Flowing from the last use to the `VariableNode` that represents the global + * variable. + * - Flowing from the `VariableNode` to an "initial def" of the global variable + * in any function that may read the global variable. + * - Flowing from the initial definition to any subsequent uses of the global + * variable in the function body. + * + * For example, consider the following pair of functions: + * ```cpp + * int global; + * int source(); + * void sink(int); + * + * void set_global() { + * global = source(); + * } + * + * void read_global() { + * sink(global); + * } + * ``` + * we insert global uses and defs so that (from the point-of-view of dataflow) + * the above scenario looks like: + * ```cpp + * int global; // (1) + * int source(); + * void sink(int); + * + * void set_global() { + * global = source(); + * __global_use(global); // (2) + * } + * + * void read_global() { + * global = __global_def; // (3) + * sink(global); // (4) + * } + * ``` + * and flow from `source()` to the argument of `sink` is then modeled as + * follows: + * 1. Flow from `source()` to `(2)` (via SSA). + * 2. Flow from `(2)` to `(1)` (via a `jumpStep`). + * 3. Flow from `(1)` to `(3)` (via a `jumpStep`). + * 4. Flow from `(3)` to `(4)` (via SSA). + */ class GlobalUse extends UseImpl, TGlobalUse { GlobalLikeVariable global; IRFunction f; @@ -494,6 +550,12 @@ class GlobalUse extends UseImpl, TGlobalUse { override BaseSourceVariableInstruction getBase() { none() } } +/** + * A definition that models a synthetic "initial definition" of a global + * variable just after the function entry point. + * + * See the QLDoc for `GlobalUse` for how this is used. + */ class GlobalDefImpl extends DefOrUseImpl, TGlobalDefImpl { GlobalLikeVariable global; IRFunction f; diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected index 441bef2cddd5..61b1a97ce45b 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected @@ -159,6 +159,9 @@ postWithInFlow | test.cpp:808:5:808:21 | * ... [post update] | PostUpdateNode should not be the target of local flow. | | test.cpp:808:6:808:21 | global_indirect1 [inner post update] | PostUpdateNode should not be the target of local flow. | | test.cpp:832:5:832:17 | global_direct [post update] | PostUpdateNode should not be the target of local flow. | +| test.cpp:931:5:931:18 | global_pointer [post update] | PostUpdateNode should not be the target of local flow. | +| test.cpp:932:5:932:19 | * ... [post update] | PostUpdateNode should not be the target of local flow. | +| test.cpp:932:6:932:19 | global_pointer [inner post update] | PostUpdateNode should not be the target of local flow. | viableImplInCallContextTooLarge uniqueParameterNodeAtPosition uniqueParameterNodePosition diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected index 946878eb56ca..3eb7d66fd0e6 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected @@ -300,6 +300,7 @@ irFlow | test.cpp:902:56:902:75 | *indirect_source(2) | test.cpp:911:19:911:48 | *global_array_static_indirect_2 | | test.cpp:914:46:914:53 | source | test.cpp:919:10:919:30 | global_pointer_static | | test.cpp:915:57:915:76 | *indirect_source(1) | test.cpp:921:19:921:50 | *global_pointer_static_indirect_1 | +| test.cpp:932:23:932:28 | call to source | test.cpp:937:10:937:24 | * ... | | true_upon_entry.cpp:9:11:9:16 | call to source | true_upon_entry.cpp:13:8:13:8 | x | | true_upon_entry.cpp:17:11:17:16 | call to source | true_upon_entry.cpp:21:8:21:8 | x | | true_upon_entry.cpp:27:9:27:14 | call to source | true_upon_entry.cpp:29:8:29:8 | x | diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp index b5883963620d..0a16f4606cd9 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp @@ -922,4 +922,18 @@ namespace GlobalArrays { sink(global_pointer_static_indirect_2); // clean: global_pointer_static_indirect_2 does not have 2 indirections indirect_sink(global_pointer_static_indirect_2); // clean: global_pointer_static_indirect_2 does not have 2 indirections } +} + +namespace global_variable_conflation_test { + int* global_pointer; + + void def() { + global_pointer = nullptr; + *global_pointer = source(); + } + + void use() { + sink(global_pointer); // clean + sink(*global_pointer); // $ ir MISSING: ast + } } \ No newline at end of file