Skip to content

Commit 5c7f2ac

Browse files
authored
Merge pull request github#12186 from aschackmull/dataflow/refactor-configuration
Data flow: Refactor configuration
2 parents 3538cf8 + 557cb17 commit 5c7f2ac

File tree

156 files changed

+43930
-181673
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

156 files changed

+43930
-181673
lines changed

config/identical-files.json

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,67 @@
11
{
22
"DataFlow Java/C++/C#/Go/Python/Ruby/Swift": [
3+
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlow.qll",
4+
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlow.qll",
5+
"cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlow.qll",
6+
"cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlow.qll",
7+
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlow.qll",
8+
"go/ql/lib/semmle/go/dataflow/internal/DataFlow.qll",
9+
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlow.qll",
10+
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlow.qll",
11+
"swift/ql/lib/codeql/swift/dataflow/internal/DataFlow.qll"
12+
],
13+
"DataFlowImpl Java/C++/C#/Go/Python/Ruby/Swift": [
314
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl.qll",
15+
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImpl.qll",
16+
"cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll",
17+
"cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll",
18+
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImpl.qll",
19+
"go/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll",
20+
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl.qll",
21+
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll",
22+
"swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImpl.qll"
23+
],
24+
"DataFlow Java/C++/C#/Go/Python/Ruby/Swift Legacy Configuration": [
25+
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl1.qll",
426
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl2.qll",
527
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl3.qll",
628
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl4.qll",
729
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl5.qll",
830
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImpl6.qll",
931
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplForSerializability.qll",
1032
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplForOnActivityResult.qll",
11-
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImpl.qll",
33+
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImpl1.qll",
1234
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImpl2.qll",
1335
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImpl3.qll",
1436
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImpl4.qll",
1537
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowImplLocal.qll",
16-
"cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll",
38+
"cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl1.qll",
1739
"cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll",
1840
"cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll",
1941
"cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll",
20-
"cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll",
42+
"cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl1.qll",
2143
"cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll",
2244
"cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll",
2345
"cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll",
24-
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImpl.qll",
46+
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImpl1.qll",
2547
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImpl2.qll",
2648
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImpl3.qll",
2749
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImpl4.qll",
2850
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImpl5.qll",
2951
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowImplForContentDataFlow.qll",
30-
"go/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll",
52+
"go/ql/lib/semmle/go/dataflow/internal/DataFlowImpl1.qll",
3153
"go/ql/lib/semmle/go/dataflow/internal/DataFlowImpl2.qll",
3254
"go/ql/lib/semmle/go/dataflow/internal/DataFlowImplForStringsNewReplacer.qll",
33-
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl.qll",
55+
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl1.qll",
3456
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl2.qll",
3557
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl3.qll",
3658
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImpl4.qll",
3759
"python/ql/lib/semmle/python/dataflow/new/internal/DataFlowImplForRegExp.qll",
38-
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl.qll",
60+
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl1.qll",
3961
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImpl2.qll",
4062
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplForHttpClientLibraries.qll",
4163
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplForPathname.qll",
42-
"swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImpl.qll"
64+
"swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImpl1.qll"
4365
],
4466
"DataFlow Java/C++/C#/Go/Python/Ruby/Swift Common": [
4567
"java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll",
@@ -52,7 +74,18 @@
5274
"ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowImplCommon.qll",
5375
"swift/ql/lib/codeql/swift/dataflow/internal/DataFlowImplCommon.qll"
5476
],
55-
"TaintTracking::Configuration Java/C++/C#/Go/Python/Ruby/Swift": [
77+
"TaintTracking Java/C++/C#/Go/Python/Ruby/Swift": [
78+
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/tainttracking1/TaintTracking.qll",
79+
"cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTracking.qll",
80+
"cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTracking.qll",
81+
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/tainttracking1/TaintTracking.qll",
82+
"go/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTracking.qll",
83+
"java/ql/lib/semmle/code/java/dataflow/internal/tainttracking1/TaintTracking.qll",
84+
"python/ql/lib/semmle/python/dataflow/new/internal/tainttracking1/TaintTracking.qll",
85+
"ruby/ql/lib/codeql/ruby/dataflow/internal/tainttracking1/TaintTracking.qll",
86+
"swift/ql/lib/codeql/swift/dataflow/internal/tainttracking1/TaintTracking.qll"
87+
],
88+
"TaintTracking Legacy Configuration Java/C++/C#/Go/Python/Ruby/Swift": [
5689
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/tainttracking1/TaintTrackingImpl.qll",
5790
"cpp/ql/lib/semmle/code/cpp/dataflow/internal/tainttracking2/TaintTrackingImpl.qll",
5891
"cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingImpl.qll",
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
category: majorAnalysis
3+
---
4+
* The main data flow and taint tracking APIs have been changed. The old APIs
5+
remain in place for now and translate to the new through a
6+
backwards-compatible wrapper. If multiple configurations are in scope
7+
simultaneously, then this may affect results slightly. The new API is quite
8+
similar to the old, but makes use of a configuration module instead of a
9+
configuration class.

cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow.qll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,6 @@
2222
import cpp
2323

2424
module DataFlow {
25-
import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowImpl
25+
import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlow
26+
import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowImpl1
2627
}

cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,6 @@ import semmle.code.cpp.ir.dataflow.DataFlow
1919
import semmle.code.cpp.ir.dataflow.DataFlow2
2020

2121
module TaintTracking {
22+
import experimental.semmle.code.cpp.ir.dataflow.internal.tainttracking1.TaintTracking
2223
import experimental.semmle.code.cpp.ir.dataflow.internal.tainttracking1.TaintTrackingImpl
2324
}
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
/**
2+
* Provides an implementation of global (interprocedural) data flow. This file
3+
* re-exports the local (intraprocedural) data flow analysis from
4+
* `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed
5+
* through the `Make` and `MakeWithState` modules.
6+
*/
7+
8+
private import DataFlowImplCommon
9+
private import DataFlowImplSpecific::Private
10+
import DataFlowImplSpecific::Public
11+
import DataFlowImplCommonPublic
12+
private import DataFlowImpl
13+
14+
/** An input configuration for data flow. */
15+
signature module ConfigSig {
16+
/**
17+
* Holds if `source` is a relevant data flow source.
18+
*/
19+
predicate isSource(Node source);
20+
21+
/**
22+
* Holds if `sink` is a relevant data flow sink.
23+
*/
24+
predicate isSink(Node sink);
25+
26+
/**
27+
* Holds if data flow through `node` is prohibited. This completely removes
28+
* `node` from the data flow graph.
29+
*/
30+
default predicate isBarrier(Node node) { none() }
31+
32+
/** Holds if data flow into `node` is prohibited. */
33+
default predicate isBarrierIn(Node node) { none() }
34+
35+
/** Holds if data flow out of `node` is prohibited. */
36+
default predicate isBarrierOut(Node node) { none() }
37+
38+
/**
39+
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
40+
*/
41+
default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
42+
43+
/**
44+
* Holds if an arbitrary number of implicit read steps of content `c` may be
45+
* taken at `node`.
46+
*/
47+
default predicate allowImplicitRead(Node node, ContentSet c) { none() }
48+
49+
/**
50+
* Gets the virtual dispatch branching limit when calculating field flow.
51+
* This can be overridden to a smaller value to improve performance (a
52+
* value of 0 disables field flow), or a larger value to get more results.
53+
*/
54+
default int fieldFlowBranchLimit() { result = 2 }
55+
56+
/**
57+
* Gets a data flow configuration feature to add restrictions to the set of
58+
* valid flow paths.
59+
*
60+
* - `FeatureHasSourceCallContext`:
61+
* Assume that sources have some existing call context to disallow
62+
* conflicting return-flow directly following the source.
63+
* - `FeatureHasSinkCallContext`:
64+
* Assume that sinks have some existing call context to disallow
65+
* conflicting argument-to-parameter flow directly preceding the sink.
66+
* - `FeatureEqualSourceSinkCallContext`:
67+
* Implies both of the above and additionally ensures that the entire flow
68+
* path preserves the call context.
69+
*
70+
* These features are generally not relevant for typical end-to-end data flow
71+
* queries, but should only be used for constructing paths that need to
72+
* somehow be pluggable in another path context.
73+
*/
74+
default FlowFeature getAFeature() { none() }
75+
76+
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
77+
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
78+
79+
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
80+
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
81+
82+
/**
83+
* Holds if hidden nodes should be included in the data flow graph.
84+
*
85+
* This feature should only be used for debugging or when the data flow graph
86+
* is not visualized (as it is in a `path-problem` query).
87+
*/
88+
default predicate includeHiddenNodes() { none() }
89+
}
90+
91+
/** An input configuration for data flow using flow state. */
92+
signature module StateConfigSig {
93+
bindingset[this]
94+
class FlowState;
95+
96+
/**
97+
* Holds if `source` is a relevant data flow source with the given initial
98+
* `state`.
99+
*/
100+
predicate isSource(Node source, FlowState state);
101+
102+
/**
103+
* Holds if `sink` is a relevant data flow sink accepting `state`.
104+
*/
105+
predicate isSink(Node sink, FlowState state);
106+
107+
/**
108+
* Holds if data flow through `node` is prohibited. This completely removes
109+
* `node` from the data flow graph.
110+
*/
111+
default predicate isBarrier(Node node) { none() }
112+
113+
/**
114+
* Holds if data flow through `node` is prohibited when the flow state is
115+
* `state`.
116+
*/
117+
predicate isBarrier(Node node, FlowState state);
118+
119+
/** Holds if data flow into `node` is prohibited. */
120+
default predicate isBarrierIn(Node node) { none() }
121+
122+
/** Holds if data flow out of `node` is prohibited. */
123+
default predicate isBarrierOut(Node node) { none() }
124+
125+
/**
126+
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
127+
*/
128+
default predicate isAdditionalFlowStep(Node node1, Node node2) { none() }
129+
130+
/**
131+
* Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps.
132+
* This step is only applicable in `state1` and updates the flow state to `state2`.
133+
*/
134+
predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2);
135+
136+
/**
137+
* Holds if an arbitrary number of implicit read steps of content `c` may be
138+
* taken at `node`.
139+
*/
140+
default predicate allowImplicitRead(Node node, ContentSet c) { none() }
141+
142+
/**
143+
* Gets the virtual dispatch branching limit when calculating field flow.
144+
* This can be overridden to a smaller value to improve performance (a
145+
* value of 0 disables field flow), or a larger value to get more results.
146+
*/
147+
default int fieldFlowBranchLimit() { result = 2 }
148+
149+
/**
150+
* Gets a data flow configuration feature to add restrictions to the set of
151+
* valid flow paths.
152+
*
153+
* - `FeatureHasSourceCallContext`:
154+
* Assume that sources have some existing call context to disallow
155+
* conflicting return-flow directly following the source.
156+
* - `FeatureHasSinkCallContext`:
157+
* Assume that sinks have some existing call context to disallow
158+
* conflicting argument-to-parameter flow directly preceding the sink.
159+
* - `FeatureEqualSourceSinkCallContext`:
160+
* Implies both of the above and additionally ensures that the entire flow
161+
* path preserves the call context.
162+
*
163+
* These features are generally not relevant for typical end-to-end data flow
164+
* queries, but should only be used for constructing paths that need to
165+
* somehow be pluggable in another path context.
166+
*/
167+
default FlowFeature getAFeature() { none() }
168+
169+
/** Holds if sources should be grouped in the result of `hasFlowPath`. */
170+
default predicate sourceGrouping(Node source, string sourceGroup) { none() }
171+
172+
/** Holds if sinks should be grouped in the result of `hasFlowPath`. */
173+
default predicate sinkGrouping(Node sink, string sinkGroup) { none() }
174+
175+
/**
176+
* Holds if hidden nodes should be included in the data flow graph.
177+
*
178+
* This feature should only be used for debugging or when the data flow graph
179+
* is not visualized (as it is in a `path-problem` query).
180+
*/
181+
default predicate includeHiddenNodes() { none() }
182+
}
183+
184+
/**
185+
* Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev`
186+
* measured in approximate number of interprocedural steps.
187+
*/
188+
signature int explorationLimitSig();
189+
190+
/**
191+
* The output of a data flow computation.
192+
*/
193+
signature module DataFlowSig {
194+
/**
195+
* A `Node` augmented with a call context (except for sinks) and an access path.
196+
* Only those `PathNode`s that are reachable from a source, and which can reach a sink, are generated.
197+
*/
198+
class PathNode;
199+
200+
/**
201+
* Holds if data can flow from `source` to `sink`.
202+
*
203+
* The corresponding paths are generated from the end-points and the graph
204+
* included in the module `PathGraph`.
205+
*/
206+
predicate hasFlowPath(PathNode source, PathNode sink);
207+
208+
/**
209+
* Holds if data can flow from `source` to `sink`.
210+
*/
211+
predicate hasFlow(Node source, Node sink);
212+
213+
/**
214+
* Holds if data can flow from some source to `sink`.
215+
*/
216+
predicate hasFlowTo(Node sink);
217+
218+
/**
219+
* Holds if data can flow from some source to `sink`.
220+
*/
221+
predicate hasFlowToExpr(DataFlowExpr sink);
222+
}
223+
224+
/**
225+
* Constructs a standard data flow computation.
226+
*/
227+
module Make<ConfigSig Config> implements DataFlowSig {
228+
private module C implements FullStateConfigSig {
229+
import DefaultState<Config>
230+
import Config
231+
}
232+
233+
import Impl<C>
234+
}
235+
236+
/**
237+
* Constructs a data flow computation using flow state.
238+
*/
239+
module MakeWithState<StateConfigSig Config> implements DataFlowSig {
240+
private module C implements FullStateConfigSig {
241+
import Config
242+
}
243+
244+
import Impl<C>
245+
}

0 commit comments

Comments
 (0)