@@ -21,53 +21,104 @@ predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
21
21
*/
22
22
cached
23
23
predicate localAdditionalTaintStep ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
24
- localInstructionTaintStep ( nodeFrom .asInstruction ( ) , nodeTo .asInstruction ( ) )
24
+ operandToInstructionTaintStep ( nodeFrom .asOperand ( ) , nodeTo .asInstruction ( ) )
25
25
or
26
- modeledTaintStep ( nodeFrom , nodeTo )
26
+ instructionToOperandTaintStep ( nodeFrom .asInstruction ( ) , nodeTo .asOperand ( ) )
27
+ }
28
+
29
+ private predicate instructionToOperandTaintStep ( Instruction fromInstr , Operand toOperand ) {
30
+ // Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
31
+ // We only do this in certain cases:
32
+ // 1. The instruction's result must not be conflated, and
33
+ // 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
34
+ // this is array types and union types. This matches the other two cases of element-to-object flow in
35
+ // `DefaultTaintTracking`.
36
+ toOperand .getAnyDef ( ) = fromInstr and
37
+ not fromInstr .isResultConflated ( ) and
38
+ (
39
+ fromInstr .getResultType ( ) instanceof ArrayType or
40
+ fromInstr .getResultType ( ) instanceof Union
41
+ )
42
+ or
43
+ exists ( ReadSideEffectInstruction readInstr |
44
+ fromInstr = readInstr .getArgumentDef ( ) and
45
+ toOperand = readInstr .getSideEffectOperand ( )
46
+ )
47
+ or
48
+ toOperand .( LoadOperand ) .getAnyDef ( ) = fromInstr
27
49
}
28
50
29
51
/**
30
52
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
31
53
* (intra-procedural) step.
32
54
*/
33
- private predicate localInstructionTaintStep ( Instruction nodeFrom , Instruction nodeTo ) {
55
+ private predicate operandToInstructionTaintStep ( Operand opFrom , Instruction instrTo ) {
34
56
// Taint can flow through expressions that alter the value but preserve
35
57
// more than one bit of it _or_ expressions that follow data through
36
58
// pointer indirections.
37
- nodeTo .getAnOperand ( ) . getAnyDef ( ) = nodeFrom and
59
+ instrTo .getAnOperand ( ) = opFrom and
38
60
(
39
- nodeTo instanceof ArithmeticInstruction
40
- or
41
- nodeTo instanceof BitwiseInstruction
61
+ instrTo instanceof ArithmeticInstruction
42
62
or
43
- nodeTo instanceof PointerArithmeticInstruction
63
+ instrTo instanceof BitwiseInstruction
44
64
or
45
- nodeTo instanceof FieldAddressInstruction
65
+ instrTo instanceof PointerArithmeticInstruction
46
66
or
47
67
// The `CopyInstruction` case is also present in non-taint data flow, but
48
68
// that uses `getDef` rather than `getAnyDef`. For taint, we want flow
49
69
// from a definition of `myStruct` to a `myStruct.myField` expression.
50
- nodeTo instanceof CopyInstruction
70
+ instrTo instanceof CopyInstruction
51
71
)
52
72
or
53
- nodeTo .( LoadInstruction ) .getSourceAddress ( ) = nodeFrom
54
- or
55
- // Flow through partial reads of arrays and unions
56
- nodeTo .( LoadInstruction ) .getSourceValueOperand ( ) .getAnyDef ( ) = nodeFrom and
57
- not nodeFrom .isResultConflated ( ) and
73
+ // Unary instructions tend to preserve enough information in practice that we
74
+ // want taint to flow through.
75
+ // The exception is `FieldAddressInstruction`. Together with the rules below for
76
+ // `LoadInstruction`s and `ChiInstruction`s, flow through `FieldAddressInstruction`
77
+ // could cause flow into one field to come out an unrelated field.
78
+ // This would happen across function boundaries, where the IR would not be able to
79
+ // match loads to stores.
80
+ instrTo .( UnaryInstruction ) .getUnaryOperand ( ) = opFrom and
58
81
(
59
- nodeFrom .getResultType ( ) instanceof ArrayType or
60
- nodeFrom .getResultType ( ) instanceof Union
82
+ not instrTo instanceof FieldAddressInstruction
83
+ or
84
+ instrTo .( FieldAddressInstruction ) .getField ( ) .getDeclaringType ( ) instanceof Union
61
85
)
62
86
or
87
+ instrTo .( LoadInstruction ) .getSourceAddressOperand ( ) = opFrom
88
+ or
63
89
// Flow from an element to an array or union that contains it.
64
- nodeTo .( ChiInstruction ) .getPartial ( ) = nodeFrom and
65
- not nodeTo .isResultConflated ( ) and
66
- exists ( Type t | nodeTo .getResultLanguageType ( ) .hasType ( t , false ) |
90
+ instrTo .( ChiInstruction ) .getPartialOperand ( ) = opFrom and
91
+ not instrTo .isResultConflated ( ) and
92
+ exists ( Type t | instrTo .getResultLanguageType ( ) .hasType ( t , false ) |
67
93
t instanceof Union
68
94
or
69
95
t instanceof ArrayType
70
96
)
97
+ or
98
+ // Until we have flow through indirections across calls, we'll take flow out
99
+ // of the indirection and into the argument.
100
+ // When we get proper flow through indirections across calls, this code can be
101
+ // moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
102
+ exists ( ReadSideEffectInstruction read |
103
+ read .getSideEffectOperand ( ) = opFrom and
104
+ read .getArgumentDef ( ) = instrTo
105
+ )
106
+ or
107
+ // Until we have from through indirections across calls, we'll take flow out
108
+ // of the parameter and into its indirection.
109
+ // `InitializeIndirectionInstruction` only has a single operand: the address of the
110
+ // value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
111
+ // the IR looks like this:
112
+ // ```
113
+ // m1 = InitializeParameter[p] : &r1
114
+ // r2 = Load[p] : r2, m1
115
+ // m3 = InitializeIndirection[p] : &r2
116
+ // ```
117
+ // So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
118
+ // `LoadOperand`'s overlap being exact.
119
+ instrTo .( InitializeIndirectionInstruction ) .getAnOperand ( ) = opFrom
120
+ or
121
+ modeledTaintStep ( opFrom , instrTo )
71
122
}
72
123
73
124
/**
@@ -110,17 +161,19 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
110
161
* Holds if taint can flow from `instrIn` to `instrOut` through a call to a
111
162
* modeled function.
112
163
*/
113
- predicate modeledTaintStep ( DataFlow :: Node nodeIn , DataFlow :: Node nodeOut ) {
164
+ predicate modeledTaintStep ( Operand nodeIn , Instruction nodeOut ) {
114
165
exists ( CallInstruction call , TaintFunction func , FunctionInput modelIn , FunctionOutput modelOut |
115
166
(
116
167
nodeIn = callInput ( call , modelIn )
117
168
or
118
169
exists ( int n |
119
- modelIn .isParameterDeref ( n ) and
120
- nodeIn = callInput ( call , any ( InParameter inParam | inParam .getIndex ( ) = n ) )
170
+ modelIn .isParameterDerefOrQualifierObject ( n ) and
171
+ if n = - 1
172
+ then nodeIn = callInput ( call , any ( InQualifierObject inQualifier ) )
173
+ else nodeIn = callInput ( call , any ( InParameter inParam | inParam .getIndex ( ) = n ) )
121
174
)
122
175
) and
123
- nodeOut . asInstruction ( ) = callOutput ( call , modelOut ) and
176
+ nodeOut = callOutput ( call , modelOut ) and
124
177
call .getStaticCallTarget ( ) = func and
125
178
func .hasTaintFlow ( modelIn , modelOut )
126
179
)
@@ -135,11 +188,29 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
135
188
int indexMid , InParameter modelMidIn , OutReturnValue modelOut
136
189
|
137
190
nodeIn = callInput ( call , modelIn ) and
138
- nodeOut . asInstruction ( ) = callOutput ( call , modelOut ) and
191
+ nodeOut = callOutput ( call , modelOut ) and
139
192
call .getStaticCallTarget ( ) = func and
140
193
func .( TaintFunction ) .hasTaintFlow ( modelIn , modelMidOut ) and
141
194
func .( DataFlowFunction ) .hasDataFlow ( modelMidIn , modelOut ) and
142
195
modelMidOut .isParameterDeref ( indexMid ) and
143
196
modelMidIn .isParameter ( indexMid )
144
197
)
198
+ or
199
+ // Taint flow from a pointer argument to an output, when the model specifies flow from the deref
200
+ // to that output, but the deref is not modeled in the IR for the caller.
201
+ exists (
202
+ CallInstruction call , ReadSideEffectInstruction read , Function func , FunctionInput modelIn ,
203
+ FunctionOutput modelOut
204
+ |
205
+ read .getSideEffectOperand ( ) = callInput ( call , modelIn ) and
206
+ read .getArgumentDef ( ) = nodeIn .getDef ( ) and
207
+ not read .getSideEffect ( ) .isResultModeled ( ) and
208
+ call .getStaticCallTarget ( ) = func and
209
+ (
210
+ func .( DataFlowFunction ) .hasDataFlow ( modelIn , modelOut )
211
+ or
212
+ func .( TaintFunction ) .hasTaintFlow ( modelIn , modelOut )
213
+ ) and
214
+ nodeOut = callOutput ( call , modelOut )
215
+ )
145
216
}
0 commit comments