Skip to content

Commit 7f16c52

Browse files
authored
Merge pull request github#3364 from github/rdmarsh/cpp/use-taint-configuration-dtt
C++: use TaintTracking::Configuration in DefaultTaintTracking
2 parents 1dbfe23 + c7c6573 commit 7f16c52

File tree

35 files changed

+788
-409
lines changed

35 files changed

+788
-409
lines changed

config/identical-files.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"cpp/ql/src/semmle/code/cpp/dataflow/internal/tainttracking2/TaintTrackingImpl.qll",
3737
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingImpl.qll",
3838
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingImpl.qll",
39+
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingImpl.qll",
3940
"csharp/ql/src/semmle/code/csharp/dataflow/internal/tainttracking1/TaintTrackingImpl.qll",
4041
"csharp/ql/src/semmle/code/csharp/dataflow/internal/tainttracking2/TaintTrackingImpl.qll",
4142
"csharp/ql/src/semmle/code/csharp/dataflow/internal/tainttracking3/TaintTrackingImpl.qll",

cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 59 additions & 223 deletions
Large diffs are not rendered by default.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/**
2+
* Provides a `TaintTracking3` module, which is a copy of the `TaintTracking`
3+
* module. Use this class when data-flow configurations or taint-tracking
4+
* configurations must depend on each other. Two classes extending
5+
* `DataFlow::Configuration` should never depend on each other, but one of them
6+
* should instead depend on a `DataFlow2::Configuration`, a
7+
* `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. The
8+
* `TaintTracking::Configuration` class extends `DataFlow::Configuration`, and
9+
* `TaintTracking2::Configuration` extends `DataFlow2::Configuration`.
10+
*
11+
* See `semmle.code.cpp.ir.dataflow.TaintTracking` for the full documentation.
12+
*/
13+
module TaintTracking3 {
14+
import semmle.code.cpp.ir.dataflow.internal.tainttracking3.TaintTrackingImpl
15+
}

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,18 @@ private import semmle.code.cpp.ir.dataflow.DataFlow
99
/**
1010
* Gets the instruction that goes into `input` for `call`.
1111
*/
12-
DataFlow::Node callInput(CallInstruction call, FunctionInput input) {
13-
// A positional argument
12+
Operand callInput(CallInstruction call, FunctionInput input) {
13+
// An argument or qualifier
1414
exists(int index |
15-
result.asInstruction() = call.getPositionalArgument(index) and
16-
input.isParameter(index)
15+
result = call.getArgumentOperand(index) and
16+
input.isParameterOrQualifierAddress(index)
1717
)
1818
or
19-
// A value pointed to by a positional argument
19+
// A value pointed to by an argument or qualifier
2020
exists(ReadSideEffectInstruction read |
21-
result.asOperand() = read.getSideEffectOperand() and
21+
result = read.getSideEffectOperand() and
2222
read.getPrimaryInstruction() = call and
23-
input.isParameterDeref(read.getIndex())
24-
)
25-
or
26-
// The qualifier pointer
27-
result.asInstruction() = call.getThisArgument() and
28-
input.isQualifierAddress()
29-
or
30-
// The qualifier object
31-
exists(ReadSideEffectInstruction read |
32-
result.asOperand() = read.getSideEffectOperand() and
33-
read.getPrimaryInstruction() = call and
34-
read.getIndex() = -1 and
35-
input.isQualifierObject()
23+
input.isParameterDerefOrQualifierObject(read.getIndex())
3624
)
3725
}
3826

@@ -44,19 +32,11 @@ Instruction callOutput(CallInstruction call, FunctionOutput output) {
4432
result = call and
4533
output.isReturnValue()
4634
or
47-
// The side effect of a call on the value pointed to by a positional argument
48-
exists(WriteSideEffectInstruction effect |
49-
result = effect and
50-
effect.getPrimaryInstruction() = call and
51-
output.isParameterDeref(effect.getIndex())
52-
)
53-
or
54-
// The side effect of a call on the qualifier object
35+
// The side effect of a call on the value pointed to by an argument or qualifier
5536
exists(WriteSideEffectInstruction effect |
5637
result = effect and
5738
effect.getPrimaryInstruction() = call and
58-
effect.getIndex() = -1 and
59-
output.isQualifierObject()
39+
output.isParameterDerefOrQualifierObject(effect.getIndex())
6040
)
6141
// TODO: return value dereference
6242
}

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll

Lines changed: 96 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,53 +21,104 @@ predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
2121
*/
2222
cached
2323
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
24-
localInstructionTaintStep(nodeFrom.asInstruction(), nodeTo.asInstruction())
24+
operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction())
2525
or
26-
modeledTaintStep(nodeFrom, nodeTo)
26+
instructionToOperandTaintStep(nodeFrom.asInstruction(), nodeTo.asOperand())
27+
}
28+
29+
private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand toOperand) {
30+
// Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
31+
// We only do this in certain cases:
32+
// 1. The instruction's result must not be conflated, and
33+
// 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
34+
// this is array types and union types. This matches the other two cases of element-to-object flow in
35+
// `DefaultTaintTracking`.
36+
toOperand.getAnyDef() = fromInstr and
37+
not fromInstr.isResultConflated() and
38+
(
39+
fromInstr.getResultType() instanceof ArrayType or
40+
fromInstr.getResultType() instanceof Union
41+
)
42+
or
43+
exists(ReadSideEffectInstruction readInstr |
44+
fromInstr = readInstr.getArgumentDef() and
45+
toOperand = readInstr.getSideEffectOperand()
46+
)
47+
or
48+
toOperand.(LoadOperand).getAnyDef() = fromInstr
2749
}
2850

2951
/**
3052
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
3153
* (intra-procedural) step.
3254
*/
33-
private predicate localInstructionTaintStep(Instruction nodeFrom, Instruction nodeTo) {
55+
private predicate operandToInstructionTaintStep(Operand opFrom, Instruction instrTo) {
3456
// Taint can flow through expressions that alter the value but preserve
3557
// more than one bit of it _or_ expressions that follow data through
3658
// pointer indirections.
37-
nodeTo.getAnOperand().getAnyDef() = nodeFrom and
59+
instrTo.getAnOperand() = opFrom and
3860
(
39-
nodeTo instanceof ArithmeticInstruction
40-
or
41-
nodeTo instanceof BitwiseInstruction
61+
instrTo instanceof ArithmeticInstruction
4262
or
43-
nodeTo instanceof PointerArithmeticInstruction
63+
instrTo instanceof BitwiseInstruction
4464
or
45-
nodeTo instanceof FieldAddressInstruction
65+
instrTo instanceof PointerArithmeticInstruction
4666
or
4767
// The `CopyInstruction` case is also present in non-taint data flow, but
4868
// that uses `getDef` rather than `getAnyDef`. For taint, we want flow
4969
// from a definition of `myStruct` to a `myStruct.myField` expression.
50-
nodeTo instanceof CopyInstruction
70+
instrTo instanceof CopyInstruction
5171
)
5272
or
53-
nodeTo.(LoadInstruction).getSourceAddress() = nodeFrom
54-
or
55-
// Flow through partial reads of arrays and unions
56-
nodeTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = nodeFrom and
57-
not nodeFrom.isResultConflated() and
73+
// Unary instructions tend to preserve enough information in practice that we
74+
// want taint to flow through.
75+
// The exception is `FieldAddressInstruction`. Together with the rules below for
76+
// `LoadInstruction`s and `ChiInstruction`s, flow through `FieldAddressInstruction`
77+
// could cause flow into one field to come out an unrelated field.
78+
// This would happen across function boundaries, where the IR would not be able to
79+
// match loads to stores.
80+
instrTo.(UnaryInstruction).getUnaryOperand() = opFrom and
5881
(
59-
nodeFrom.getResultType() instanceof ArrayType or
60-
nodeFrom.getResultType() instanceof Union
82+
not instrTo instanceof FieldAddressInstruction
83+
or
84+
instrTo.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union
6185
)
6286
or
87+
instrTo.(LoadInstruction).getSourceAddressOperand() = opFrom
88+
or
6389
// Flow from an element to an array or union that contains it.
64-
nodeTo.(ChiInstruction).getPartial() = nodeFrom and
65-
not nodeTo.isResultConflated() and
66-
exists(Type t | nodeTo.getResultLanguageType().hasType(t, false) |
90+
instrTo.(ChiInstruction).getPartialOperand() = opFrom and
91+
not instrTo.isResultConflated() and
92+
exists(Type t | instrTo.getResultLanguageType().hasType(t, false) |
6793
t instanceof Union
6894
or
6995
t instanceof ArrayType
7096
)
97+
or
98+
// Until we have flow through indirections across calls, we'll take flow out
99+
// of the indirection and into the argument.
100+
// When we get proper flow through indirections across calls, this code can be
101+
// moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
102+
exists(ReadSideEffectInstruction read |
103+
read.getSideEffectOperand() = opFrom and
104+
read.getArgumentDef() = instrTo
105+
)
106+
or
107+
// Until we have from through indirections across calls, we'll take flow out
108+
// of the parameter and into its indirection.
109+
// `InitializeIndirectionInstruction` only has a single operand: the address of the
110+
// value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
111+
// the IR looks like this:
112+
// ```
113+
// m1 = InitializeParameter[p] : &r1
114+
// r2 = Load[p] : r2, m1
115+
// m3 = InitializeIndirection[p] : &r2
116+
// ```
117+
// So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
118+
// `LoadOperand`'s overlap being exact.
119+
instrTo.(InitializeIndirectionInstruction).getAnOperand() = opFrom
120+
or
121+
modeledTaintStep(opFrom, instrTo)
71122
}
72123

73124
/**
@@ -110,17 +161,19 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
110161
* Holds if taint can flow from `instrIn` to `instrOut` through a call to a
111162
* modeled function.
112163
*/
113-
predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
164+
predicate modeledTaintStep(Operand nodeIn, Instruction nodeOut) {
114165
exists(CallInstruction call, TaintFunction func, FunctionInput modelIn, FunctionOutput modelOut |
115166
(
116167
nodeIn = callInput(call, modelIn)
117168
or
118169
exists(int n |
119-
modelIn.isParameterDeref(n) and
120-
nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n))
170+
modelIn.isParameterDerefOrQualifierObject(n) and
171+
if n = -1
172+
then nodeIn = callInput(call, any(InQualifierObject inQualifier))
173+
else nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n))
121174
)
122175
) and
123-
nodeOut.asInstruction() = callOutput(call, modelOut) and
176+
nodeOut = callOutput(call, modelOut) and
124177
call.getStaticCallTarget() = func and
125178
func.hasTaintFlow(modelIn, modelOut)
126179
)
@@ -135,11 +188,29 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
135188
int indexMid, InParameter modelMidIn, OutReturnValue modelOut
136189
|
137190
nodeIn = callInput(call, modelIn) and
138-
nodeOut.asInstruction() = callOutput(call, modelOut) and
191+
nodeOut = callOutput(call, modelOut) and
139192
call.getStaticCallTarget() = func and
140193
func.(TaintFunction).hasTaintFlow(modelIn, modelMidOut) and
141194
func.(DataFlowFunction).hasDataFlow(modelMidIn, modelOut) and
142195
modelMidOut.isParameterDeref(indexMid) and
143196
modelMidIn.isParameter(indexMid)
144197
)
198+
or
199+
// Taint flow from a pointer argument to an output, when the model specifies flow from the deref
200+
// to that output, but the deref is not modeled in the IR for the caller.
201+
exists(
202+
CallInstruction call, ReadSideEffectInstruction read, Function func, FunctionInput modelIn,
203+
FunctionOutput modelOut
204+
|
205+
read.getSideEffectOperand() = callInput(call, modelIn) and
206+
read.getArgumentDef() = nodeIn.getDef() and
207+
not read.getSideEffect().isResultModeled() and
208+
call.getStaticCallTarget() = func and
209+
(
210+
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
211+
or
212+
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
213+
) and
214+
nodeOut = callOutput(call, modelOut)
215+
)
145216
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/**
2+
* Provides an implementation of global (interprocedural) taint tracking.
3+
* This file re-exports the local (intraprocedural) taint-tracking analysis
4+
* from `TaintTrackingParameter::Public` and adds a global analysis, mainly
5+
* exposed through the `Configuration` class. For some languages, this file
6+
* exists in several identical copies, allowing queries to use multiple
7+
* `Configuration` classes that depend on each other without introducing
8+
* mutual recursion among those configurations.
9+
*/
10+
11+
import TaintTrackingParameter::Public
12+
private import TaintTrackingParameter::Private
13+
14+
/**
15+
* A configuration of interprocedural taint tracking analysis. This defines
16+
* sources, sinks, and any other configurable aspect of the analysis. Each
17+
* use of the taint tracking library must define its own unique extension of
18+
* this abstract class.
19+
*
20+
* A taint-tracking configuration is a special data flow configuration
21+
* (`DataFlow::Configuration`) that allows for flow through nodes that do not
22+
* necessarily preserve values but are still relevant from a taint tracking
23+
* perspective. (For example, string concatenation, where one of the operands
24+
* is tainted.)
25+
*
26+
* To create a configuration, extend this class with a subclass whose
27+
* characteristic predicate is a unique singleton string. For example, write
28+
*
29+
* ```ql
30+
* class MyAnalysisConfiguration extends TaintTracking::Configuration {
31+
* MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" }
32+
* // Override `isSource` and `isSink`.
33+
* // Optionally override `isSanitizer`.
34+
* // Optionally override `isSanitizerIn`.
35+
* // Optionally override `isSanitizerOut`.
36+
* // Optionally override `isSanitizerGuard`.
37+
* // Optionally override `isAdditionalTaintStep`.
38+
* }
39+
* ```
40+
*
41+
* Then, to query whether there is flow between some `source` and `sink`,
42+
* write
43+
*
44+
* ```ql
45+
* exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink))
46+
* ```
47+
*
48+
* Multiple configurations can coexist, but it is unsupported to depend on
49+
* another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the
50+
* overridden predicates that define sources, sinks, or additional steps.
51+
* Instead, the dependency should go to a `TaintTracking2::Configuration` or a
52+
* `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc.
53+
*/
54+
abstract class Configuration extends DataFlow::Configuration {
55+
bindingset[this]
56+
Configuration() { any() }
57+
58+
/**
59+
* Holds if `source` is a relevant taint source.
60+
*
61+
* The smaller this predicate is, the faster `hasFlow()` will converge.
62+
*/
63+
// overridden to provide taint-tracking specific qldoc
64+
abstract override predicate isSource(DataFlow::Node source);
65+
66+
/**
67+
* Holds if `sink` is a relevant taint sink.
68+
*
69+
* The smaller this predicate is, the faster `hasFlow()` will converge.
70+
*/
71+
// overridden to provide taint-tracking specific qldoc
72+
abstract override predicate isSink(DataFlow::Node sink);
73+
74+
/** Holds if the node `node` is a taint sanitizer. */
75+
predicate isSanitizer(DataFlow::Node node) { none() }
76+
77+
final override predicate isBarrier(DataFlow::Node node) {
78+
isSanitizer(node) or
79+
defaultTaintSanitizer(node)
80+
}
81+
82+
/** Holds if taint propagation into `node` is prohibited. */
83+
predicate isSanitizerIn(DataFlow::Node node) { none() }
84+
85+
final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) }
86+
87+
/** Holds if taint propagation out of `node` is prohibited. */
88+
predicate isSanitizerOut(DataFlow::Node node) { none() }
89+
90+
final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) }
91+
92+
/** Holds if taint propagation through nodes guarded by `guard` is prohibited. */
93+
predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() }
94+
95+
final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { isSanitizerGuard(guard) }
96+
97+
/**
98+
* Holds if the additional taint propagation step from `node1` to `node2`
99+
* must be taken into account in the analysis.
100+
*/
101+
predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() }
102+
103+
final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
104+
isAdditionalTaintStep(node1, node2) or
105+
defaultAdditionalTaintStep(node1, node2)
106+
}
107+
108+
/**
109+
* Holds if taint may flow from `source` to `sink` for this configuration.
110+
*/
111+
// overridden to provide taint-tracking specific qldoc
112+
override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) {
113+
super.hasFlow(source, sink)
114+
}
115+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import semmle.code.cpp.ir.dataflow.internal.TaintTrackingUtil as Public
2+
3+
module Private {
4+
import semmle.code.cpp.ir.dataflow.DataFlow3::DataFlow3 as DataFlow
5+
}

0 commit comments

Comments
 (0)