Skip to content

Commit b48fe6a

Browse files
authored
Merge pull request github#3123 from jbj/dataflow-indirect-args
C++: Wire up param/arg indirections in data flow
2 parents 5cdc29e + df96f8e commit b48fe6a

File tree

19 files changed

+263
-38
lines changed

19 files changed

+263
-38
lines changed

cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ private DataFlow::Node getNodeForSource(Expr source) {
6767
// to `gets`. It's impossible here to tell which is which, but the "access
6868
// to argv" source is definitely not intended to match an output argument,
6969
// and it causes false positives if we let it.
70+
//
71+
// This case goes together with the similar (but not identical) rule in
72+
// `nodeIsBarrierIn`.
7073
result = DataFlow::definitionByReferenceNode(source) and
7174
not argv(source.(VariableAccess).getTarget())
7275
)
@@ -202,7 +205,13 @@ private predicate nodeIsBarrier(DataFlow::Node node) {
202205

203206
private predicate nodeIsBarrierIn(DataFlow::Node node) {
204207
// don't use dataflow into taint sources, as this leads to duplicate results.
205-
node = getNodeForSource(any(Expr e))
208+
exists(Expr source | isUserInput(source, _) |
209+
node = DataFlow::exprNode(source)
210+
or
211+
// This case goes together with the similar (but not identical) rule in
212+
// `getNodeForSource`.
213+
node = DataFlow::definitionByReferenceNode(source)
214+
)
206215
}
207216

208217
cached

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,28 @@ private import DataFlowDispatch
88
* to the callable. Instance arguments (`this` pointer) are also included.
99
*/
1010
class ArgumentNode extends InstructionNode {
11-
ArgumentNode() { exists(CallInstruction call | this.getInstruction() = call.getAnArgument()) }
11+
ArgumentNode() {
12+
exists(CallInstruction call |
13+
instr = call.getAnArgument()
14+
or
15+
instr.(ReadSideEffectInstruction).getPrimaryInstruction() = call
16+
)
17+
}
1218

1319
/**
1420
* Holds if this argument occurs at the given position in the given call.
1521
* The instance argument is considered to have index `-1`.
1622
*/
1723
predicate argumentOf(DataFlowCall call, int pos) {
18-
this.getInstruction() = call.getPositionalArgument(pos)
24+
instr = call.getPositionalArgument(pos)
1925
or
20-
this.getInstruction() = call.getThisArgument() and pos = -1
26+
instr = call.getThisArgument() and pos = -1
27+
or
28+
exists(ReadSideEffectInstruction read |
29+
read = instr and
30+
read.getPrimaryInstruction() = call and
31+
pos = getArgumentPosOfSideEffect(read.getIndex())
32+
)
2133
}
2234

2335
/** Gets the call in which this node is an argument. */

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 99 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class Node extends TIRDataFlowNode {
5454
/** Gets the argument that defines this `DefinitionByReferenceNode`, if any. */
5555
Expr asDefiningArgument() { result = this.(DefinitionByReferenceNode).getArgument() }
5656

57-
/** Gets the parameter corresponding to this node, if any. */
57+
/** Gets the positional parameter corresponding to this node, if any. */
5858
Parameter asParameter() { result = this.(ExplicitParameterNode).getParameter() }
5959

6060
/**
@@ -156,44 +156,90 @@ class ExprNode extends InstructionNode {
156156
}
157157

158158
/**
159-
* A node representing a `Parameter`. This includes both explicit parameters such
160-
* as `x` in `f(x)` and implicit parameters such as `this` in `x.f()`
159+
* INTERNAL: do not use. Translates a parameter/argument index into a negative
160+
* number that denotes the index of its side effect (pointer indirection).
161+
*/
162+
bindingset[index]
163+
int getArgumentPosOfSideEffect(int index) {
164+
// -1 -> -2
165+
// 0 -> -3
166+
// 1 -> -4
167+
// ...
168+
result = -3 - index
169+
}
170+
171+
/**
172+
* The value of a parameter at function entry, viewed as a node in a data
173+
* flow graph. This includes both explicit parameters such as `x` in `f(x)`
174+
* and implicit parameters such as `this` in `x.f()`.
175+
*
176+
* To match a specific kind of parameter, consider using one of the subclasses
177+
* `ExplicitParameterNode`, `ThisParameterNode`, or
178+
* `ParameterIndirectionNode`.
161179
*/
162180
class ParameterNode extends InstructionNode {
163-
override InitializeParameterInstruction instr;
181+
ParameterNode() {
182+
// To avoid making this class abstract, we enumerate its values here
183+
instr instanceof InitializeParameterInstruction
184+
or
185+
instr instanceof InitializeIndirectionInstruction
186+
}
164187

165188
/**
166-
* Holds if this node is the parameter of `c` at the specified (zero-based)
167-
* position. The implicit `this` parameter is considered to have index `-1`.
189+
* Holds if this node is the parameter of `f` at the specified position. The
190+
* implicit `this` parameter is considered to have position `-1`, and
191+
* pointer-indirection parameters are at further negative positions.
168192
*/
169-
predicate isParameterOf(Function f, int i) { none() } // overriden by subclasses
193+
predicate isParameterOf(Function f, int pos) { none() } // overridden by subclasses
170194
}
171195

172-
/**
173-
* The value of a parameter at function entry, viewed as a node in a data
174-
* flow graph.
175-
*/
196+
/** An explicit positional parameter, not including `this` or `...`. */
176197
private class ExplicitParameterNode extends ParameterNode {
198+
override InitializeParameterInstruction instr;
199+
177200
ExplicitParameterNode() { exists(instr.getParameter()) }
178201

179-
override predicate isParameterOf(Function f, int i) { f.getParameter(i) = instr.getParameter() }
202+
override predicate isParameterOf(Function f, int pos) {
203+
f.getParameter(pos) = instr.getParameter()
204+
}
180205

181-
/** Gets the parameter corresponding to this node. */
206+
/** Gets the `Parameter` associated with this node. */
182207
Parameter getParameter() { result = instr.getParameter() }
183208

184209
override string toString() { result = instr.getParameter().toString() }
185210
}
186211

187-
private class ThisParameterNode extends ParameterNode {
212+
/** An implicit `this` parameter. */
213+
class ThisParameterNode extends ParameterNode {
214+
override InitializeParameterInstruction instr;
215+
188216
ThisParameterNode() { instr.getIRVariable() instanceof IRThisVariable }
189217

190-
override predicate isParameterOf(Function f, int i) {
191-
i = -1 and instr.getEnclosingFunction() = f
218+
override predicate isParameterOf(Function f, int pos) {
219+
pos = -1 and instr.getEnclosingFunction() = f
192220
}
193221

194222
override string toString() { result = "this" }
195223
}
196224

225+
/** A synthetic parameter to model the pointed-to object of a pointer parameter. */
226+
class ParameterIndirectionNode extends ParameterNode {
227+
override InitializeIndirectionInstruction instr;
228+
229+
override predicate isParameterOf(Function f, int pos) {
230+
exists(int index |
231+
f.getParameter(index) = instr.getParameter()
232+
or
233+
index = -1 and
234+
instr.getIRVariable().(IRThisVariable).getEnclosingFunction() = f
235+
|
236+
pos = getArgumentPosOfSideEffect(index)
237+
)
238+
}
239+
240+
override string toString() { result = "*" + instr.getIRVariable().toString() }
241+
}
242+
197243
/**
198244
* DEPRECATED: Data flow was never an accurate way to determine what
199245
* expressions might be uninitialized. It errs on the side of saying that
@@ -341,6 +387,18 @@ class DefinitionByReferenceNode extends InstructionNode {
341387
}
342388
}
343389

390+
/**
391+
* A node representing the memory pointed to by a function argument.
392+
*
393+
* This class exists only in order to override `toString`, which would
394+
* otherwise be the default implementation inherited from `InstructionNode`.
395+
*/
396+
private class ArgumentIndirectionNode extends InstructionNode {
397+
override ReadSideEffectInstruction instr;
398+
399+
override string toString() { result = "Argument " + instr.getIndex() + " indirection" }
400+
}
401+
344402
/**
345403
* A `Node` corresponding to a variable in the program, as opposed to the
346404
* value of that variable at some particular point. This can be used for
@@ -442,6 +500,31 @@ private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction
442500
or
443501
iTo.(PhiInstruction).getAnOperand().getDef() = iFrom
444502
or
503+
// A read side effect is almost never exact since we don't know exactly how
504+
// much memory the callee will read.
505+
iTo.(ReadSideEffectInstruction).getSideEffectOperand().getAnyDef() = iFrom and
506+
not iFrom.isResultConflated()
507+
or
508+
// Loading a single `int` from an `int *` parameter is not an exact load since
509+
// the parameter may point to an entire array rather than a single `int`. The
510+
// following rule ensures that any flow going into the
511+
// `InitializeIndirectionInstruction`, even if it's for a different array
512+
// element, will propagate to a load of the first element.
513+
//
514+
// Since we're linking `InitializeIndirectionInstruction` and
515+
// `LoadInstruction` together directly, this rule will break if there's any
516+
// reassignment of the parameter indirection, including a conditional one that
517+
// leads to a phi node.
518+
exists(InitializeIndirectionInstruction init |
519+
iFrom = init and
520+
iTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = init and
521+
// Check that the types match. Otherwise we can get flow from an object to
522+
// its fields, which leads to field conflation when there's flow from other
523+
// fields to the object elsewhere.
524+
init.getParameter().getType().getUnspecifiedType().(DerivedType).getBaseType() =
525+
iTo.getResultType().getUnspecifiedType()
526+
)
527+
or
445528
// Treat all conversions as flow, even conversions between different numeric types.
446529
iTo.(ConvertInstruction).getUnary() = iFrom
447530
or

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/defaulttainttracking.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,35 @@ void test_conflated_fields3() {
117117
taint_y(&xy);
118118
sink(xy.x); // not tainted
119119
}
120+
121+
struct Point {
122+
int x;
123+
int y;
124+
125+
void callSink() {
126+
sink(this->x); // tainted
127+
sink(this->y); // not tainted
128+
}
129+
};
130+
131+
void test_conflated_fields1() {
132+
Point p;
133+
p.x = getenv("VAR")[0];
134+
sink(p.x); // tainted
135+
sink(p.y); // not tainted
136+
p.callSink();
137+
}
138+
139+
void taint_x(Point *pp) {
140+
pp->x = getenv("VAR")[0];
141+
}
142+
143+
void y_to_sink(Point *pp) {
144+
sink(pp->y); // not tainted
145+
}
146+
147+
void test_conflated_fields2() {
148+
Point p;
149+
taint_x(&p);
150+
y_to_sink(&p);
151+
}

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,18 @@
103103
| defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:110:17:110:32 | (int)... |
104104
| defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:110:17:110:32 | access to array |
105105
| defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:111:12:111:18 | tainted |
106+
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:126:16:126:16 | x |
107+
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:133:9:133:14 | call to getenv |
108+
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:133:9:133:24 | (int)... |
109+
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:133:9:133:24 | access to array |
110+
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:134:10:134:10 | x |
111+
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | shared.h:6:15:6:23 | sinkparam |
112+
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:140:11:140:16 | call to getenv |
113+
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:140:11:140:26 | (int)... |
114+
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:140:11:140:26 | access to array |
115+
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:143:23:143:24 | pp |
116+
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:144:8:144:9 | pp |
117+
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:150:13:150:14 | & ... |
106118
| dispatch.cpp:28:29:28:34 | call to getenv | dispatch.cpp:28:24:28:27 | call to atoi |
107119
| dispatch.cpp:28:29:28:34 | call to getenv | dispatch.cpp:28:29:28:34 | call to getenv |
108120
| dispatch.cpp:28:29:28:34 | call to getenv | dispatch.cpp:28:29:28:45 | (const char *)... |

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/test_diff.expected

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,14 @@
2121
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | defaulttainttracking.cpp:98:10:98:11 | p2 | IR only |
2222
| defaulttainttracking.cpp:97:27:97:32 | call to getenv | shared.h:5:23:5:31 | sinkparam | IR only |
2323
| defaulttainttracking.cpp:110:17:110:22 | call to getenv | defaulttainttracking.cpp:111:8:111:8 | y | AST only |
24+
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:126:16:126:16 | x | IR only |
25+
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:133:5:133:5 | x | AST only |
26+
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | defaulttainttracking.cpp:134:10:134:10 | x | IR only |
27+
| defaulttainttracking.cpp:133:9:133:14 | call to getenv | shared.h:6:15:6:23 | sinkparam | IR only |
28+
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:140:7:140:7 | x | AST only |
29+
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:143:23:143:24 | pp | IR only |
30+
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:144:8:144:9 | pp | IR only |
31+
| defaulttainttracking.cpp:140:11:140:16 | call to getenv | defaulttainttracking.cpp:150:13:150:14 | & ... | IR only |
2432
| globals.cpp:13:15:13:20 | call to getenv | globals.cpp:13:5:13:11 | global1 | AST only |
2533
| globals.cpp:23:15:23:20 | call to getenv | globals.cpp:23:5:23:11 | global2 | AST only |
2634
| test_diff.cpp:104:12:104:15 | argv | test_diff.cpp:104:11:104:20 | (...) | IR only |

cpp/ql/test/library-tests/dataflow/dataflow-tests/clang.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ void following_pointers(
2727

2828
sourceStruct1_ptr->m1 = source();
2929
sink(sourceStruct1_ptr->m1); // flow
30-
sink(sourceStruct1_ptr->getFirst()); // flow [NOT DETECTED with IR]
30+
sink(sourceStruct1_ptr->getFirst()); // flow
3131
sink(sourceStruct1_ptr->m2); // no flow
3232
sink(sourceStruct1.m1); // no flow
3333

cpp/ql/test/library-tests/dataflow/dataflow-tests/test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ class FlowThroughFields {
366366
}
367367

368368
int calledAfterTaint() {
369-
sink(field); // tainted [NOT DETECTED with IR]
369+
sink(field); // tainted
370370
}
371371

372372
int taintAndCall() {

cpp/ql/test/library-tests/dataflow/dataflow-tests/test_diff.expected

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
| BarrierGuard.cpp:49:10:49:15 | BarrierGuard.cpp:51:13:51:13 | AST only |
22
| BarrierGuard.cpp:60:11:60:16 | BarrierGuard.cpp:62:14:62:14 | AST only |
33
| clang.cpp:12:9:12:20 | clang.cpp:22:8:22:20 | AST only |
4-
| clang.cpp:28:27:28:32 | clang.cpp:30:27:30:34 | AST only |
54
| clang.cpp:39:42:39:47 | clang.cpp:41:18:41:19 | IR only |
65
| dispatch.cpp:16:37:16:42 | dispatch.cpp:32:16:32:24 | IR only |
76
| dispatch.cpp:16:37:16:42 | dispatch.cpp:40:15:40:23 | IR only |
@@ -19,11 +18,7 @@
1918
| dispatch.cpp:144:8:144:13 | dispatch.cpp:96:8:96:8 | IR only |
2019
| globals.cpp:13:23:13:28 | globals.cpp:12:10:12:24 | IR only |
2120
| globals.cpp:23:23:23:28 | globals.cpp:19:10:19:24 | IR only |
22-
| lambdas.cpp:8:10:8:15 | lambdas.cpp:14:3:14:6 | AST only |
23-
| lambdas.cpp:8:10:8:15 | lambdas.cpp:18:8:18:8 | AST only |
2421
| lambdas.cpp:8:10:8:15 | lambdas.cpp:21:3:21:6 | AST only |
25-
| lambdas.cpp:8:10:8:15 | lambdas.cpp:29:3:29:6 | AST only |
26-
| lambdas.cpp:8:10:8:15 | lambdas.cpp:41:8:41:8 | AST only |
2722
| lambdas.cpp:43:7:43:12 | lambdas.cpp:46:7:46:7 | AST only |
2823
| ref.cpp:29:11:29:16 | ref.cpp:62:10:62:11 | AST only |
2924
| ref.cpp:53:9:53:10 | ref.cpp:56:10:56:11 | AST only |
@@ -39,7 +34,6 @@
3934
| test.cpp:109:9:109:14 | test.cpp:110:10:110:12 | IR only |
4035
| test.cpp:347:17:347:22 | test.cpp:349:10:349:18 | AST only |
4136
| test.cpp:359:13:359:18 | test.cpp:365:10:365:14 | AST only |
42-
| test.cpp:373:13:373:18 | test.cpp:369:10:369:14 | AST only |
4337
| test.cpp:399:7:399:9 | test.cpp:401:8:401:10 | AST only |
4438
| test.cpp:405:7:405:9 | test.cpp:408:8:408:10 | AST only |
4539
| test.cpp:416:7:416:11 | test.cpp:418:8:418:12 | AST only |

cpp/ql/test/library-tests/dataflow/dataflow-tests/test_ir.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
| clang.cpp:18:8:18:19 | (const int *)... | clang.cpp:12:9:12:20 | sourceArray1 |
1313
| clang.cpp:18:8:18:19 | sourceArray1 | clang.cpp:12:9:12:20 | sourceArray1 |
1414
| clang.cpp:29:27:29:28 | m1 | clang.cpp:28:27:28:32 | call to source |
15+
| clang.cpp:30:27:30:34 | call to getFirst | clang.cpp:28:27:28:32 | call to source |
1516
| clang.cpp:37:10:37:11 | m2 | clang.cpp:34:32:34:37 | call to source |
1617
| clang.cpp:41:18:41:19 | m2 | clang.cpp:39:42:39:47 | call to source |
1718
| clang.cpp:45:17:45:18 | m2 | clang.cpp:43:35:43:40 | call to source |
@@ -38,7 +39,11 @@
3839
| globals.cpp:6:10:6:14 | local | globals.cpp:5:17:5:22 | call to source |
3940
| globals.cpp:12:10:12:24 | flowTestGlobal1 | globals.cpp:13:23:13:28 | call to source |
4041
| globals.cpp:19:10:19:24 | flowTestGlobal2 | globals.cpp:23:23:23:28 | call to source |
42+
| lambdas.cpp:14:3:14:6 | t | lambdas.cpp:8:10:8:15 | call to source |
43+
| lambdas.cpp:18:8:18:8 | call to operator() | lambdas.cpp:8:10:8:15 | call to source |
44+
| lambdas.cpp:29:3:29:6 | t | lambdas.cpp:8:10:8:15 | call to source |
4145
| lambdas.cpp:35:8:35:8 | a | lambdas.cpp:8:10:8:15 | call to source |
46+
| lambdas.cpp:41:8:41:8 | (reference dereference) | lambdas.cpp:8:10:8:15 | call to source |
4247
| ref.cpp:123:13:123:15 | val | ref.cpp:122:23:122:28 | call to source |
4348
| ref.cpp:126:13:126:15 | val | ref.cpp:125:19:125:24 | call to source |
4449
| ref.cpp:129:13:129:15 | val | ref.cpp:94:15:94:20 | call to source |
@@ -65,6 +70,7 @@
6570
| test.cpp:266:12:266:12 | x | test.cpp:265:22:265:27 | call to source |
6671
| test.cpp:289:14:289:14 | x | test.cpp:305:17:305:22 | call to source |
6772
| test.cpp:318:7:318:7 | x | test.cpp:314:4:314:9 | call to source |
73+
| test.cpp:369:10:369:14 | field | test.cpp:373:13:373:18 | call to source |
6874
| test.cpp:375:10:375:14 | field | test.cpp:373:13:373:18 | call to source |
6975
| test.cpp:385:8:385:10 | tmp | test.cpp:382:48:382:54 | source1 |
7076
| test.cpp:392:8:392:10 | tmp | test.cpp:388:53:388:59 | source1 |

0 commit comments

Comments
 (0)