Skip to content

Commit 20c956e

Browse files
authored
Merge pull request github#3320 from Semmle/rdmarsh/cpp/taint-tracking-util-port
C++: move logic from DefaultTaintTracking into TaintTrackingUtil
2 parents de08433 + 0dc797d commit 20c956e

File tree

4 files changed

+106
-0
lines changed

4 files changed

+106
-0
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/**
2+
* Provides predicates for mapping the `FunctionInput` and `FunctionOutput`
3+
* classes used in function models to the corresponding instructions.
4+
*/
5+
6+
private import semmle.code.cpp.ir.IR
7+
private import semmle.code.cpp.ir.dataflow.DataFlow
8+
9+
/**
10+
* Gets the instruction that goes into `input` for `call`.
11+
*/
12+
Instruction callInput(CallInstruction call, FunctionInput input) {
13+
// A positional argument
14+
exists(int index |
15+
result = call.getPositionalArgument(index) and
16+
input.isParameter(index)
17+
)
18+
or
19+
// A value pointed to by a positional argument
20+
exists(ReadSideEffectInstruction read |
21+
result = read and
22+
read.getPrimaryInstruction() = call and
23+
input.isParameterDeref(read.getIndex())
24+
)
25+
or
26+
// The qualifier pointer
27+
result = call.getThisArgument() and
28+
input.isQualifierAddress()
29+
//TODO: qualifier deref
30+
}
31+
32+
/**
33+
* Gets the instruction that holds the `output` for `call`.
34+
*/
35+
Instruction callOutput(CallInstruction call, FunctionOutput output) {
36+
// The return value
37+
result = call and
38+
output.isReturnValue()
39+
or
40+
// The side effect of a call on the value pointed to by a positional argument
41+
exists(WriteSideEffectInstruction effect |
42+
result = effect and
43+
effect.getPrimaryInstruction() = call and
44+
output.isParameterDeref(effect.getIndex())
45+
)
46+
// TODO: qualifiers, return value dereference
47+
}

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
private import semmle.code.cpp.ir.IR
22
private import semmle.code.cpp.ir.dataflow.DataFlow
3+
private import ModelUtil
4+
private import semmle.code.cpp.models.interfaces.DataFlow
5+
private import semmle.code.cpp.models.interfaces.SideEffect
36

47
/**
58
* Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local
@@ -45,6 +48,25 @@ private predicate localInstructionTaintStep(Instruction nodeFrom, Instruction no
4548
)
4649
or
4750
nodeTo.(LoadInstruction).getSourceAddress() = nodeFrom
51+
or
52+
modeledInstructionTaintStep(nodeFrom, nodeTo)
53+
or
54+
// Flow through partial reads of arrays and unions
55+
nodeTo.(LoadInstruction).getSourceValueOperand().getAnyDef() = nodeFrom and
56+
not nodeFrom.isResultConflated() and
57+
(
58+
nodeFrom.getResultType() instanceof ArrayType or
59+
nodeFrom.getResultType() instanceof Union
60+
)
61+
or
62+
// Flow from an element to an array or union that contains it.
63+
nodeTo.(ChiInstruction).getPartial() = nodeFrom and
64+
not nodeTo.isResultConflated() and
65+
exists(Type t | nodeTo.getResultLanguageType().hasType(t, false) |
66+
t instanceof Union
67+
or
68+
t instanceof ArrayType
69+
)
4870
}
4971

5072
/**
@@ -82,3 +104,34 @@ predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) {
82104
* but not in local taint.
83105
*/
84106
predicate defaultTaintBarrier(DataFlow::Node node) { none() }
107+
108+
/**
109+
* Holds if taint can flow from `instrIn` to `instrOut` through a call to a
110+
* modeled function.
111+
*/
112+
predicate modeledInstructionTaintStep(Instruction instrIn, Instruction instrOut) {
113+
exists(CallInstruction call, TaintFunction func, FunctionInput modelIn, FunctionOutput modelOut |
114+
instrIn = callInput(call, modelIn) and
115+
instrOut = callOutput(call, modelOut) and
116+
call.getStaticCallTarget() = func and
117+
func.hasTaintFlow(modelIn, modelOut)
118+
)
119+
or
120+
// Taint flow from one argument to another and data flow from an argument to a
121+
// return value. This happens in functions like `strcat` and `memcpy`. We
122+
// could model this flow in two separate steps, but that would add reverse
123+
// flow from the write side-effect to the call instruction, which may not be
124+
// desirable.
125+
exists(
126+
CallInstruction call, Function func, FunctionInput modelIn, OutParameterDeref modelMidOut,
127+
int indexMid, InParameter modelMidIn, OutReturnValue modelOut
128+
|
129+
instrIn = callInput(call, modelIn) and
130+
instrOut = callOutput(call, modelOut) and
131+
call.getStaticCallTarget() = func and
132+
func.(TaintFunction).hasTaintFlow(modelIn, modelMidOut) and
133+
func.(DataFlowFunction).hasDataFlow(modelMidIn, modelOut) and
134+
modelMidOut.isParameterDeref(indexMid) and
135+
modelMidIn.isParameter(indexMid)
136+
)
137+
}

cpp/ql/test/library-tests/dataflow/taint-tests/test_diff.expected

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
| taint.cpp:93:11:93:11 | taint.cpp:71:22:71:27 | AST only |
2323
| taint.cpp:94:11:94:11 | taint.cpp:72:7:72:12 | AST only |
2424
| taint.cpp:109:7:109:13 | taint.cpp:105:12:105:17 | IR only |
25+
| taint.cpp:110:7:110:13 | taint.cpp:105:12:105:17 | IR only |
26+
| taint.cpp:111:7:111:13 | taint.cpp:106:12:106:17 | IR only |
27+
| taint.cpp:112:7:112:13 | taint.cpp:106:12:106:17 | IR only |
2528
| taint.cpp:130:7:130:9 | taint.cpp:127:8:127:13 | IR only |
2629
| taint.cpp:137:7:137:9 | taint.cpp:120:11:120:16 | AST only |
2730
| taint.cpp:173:8:173:13 | taint.cpp:164:19:164:24 | AST only |

cpp/ql/test/library-tests/dataflow/taint-tests/test_ir.expected

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
| taint.cpp:16:8:16:14 | source1 | taint.cpp:12:22:12:27 | call to source |
55
| taint.cpp:17:8:17:16 | ++ ... | taint.cpp:12:22:12:27 | call to source |
66
| taint.cpp:109:7:109:13 | access to array | taint.cpp:105:12:105:17 | call to source |
7+
| taint.cpp:110:7:110:13 | access to array | taint.cpp:105:12:105:17 | call to source |
8+
| taint.cpp:111:7:111:13 | access to array | taint.cpp:106:12:106:17 | call to source |
9+
| taint.cpp:112:7:112:13 | access to array | taint.cpp:106:12:106:17 | call to source |
710
| taint.cpp:129:7:129:9 | * ... | taint.cpp:120:11:120:16 | call to source |
811
| taint.cpp:130:7:130:9 | * ... | taint.cpp:127:8:127:13 | call to source |
912
| taint.cpp:134:7:134:9 | * ... | taint.cpp:120:11:120:16 | call to source |

0 commit comments

Comments
 (0)