Skip to content

Commit 74ae2e0

Browse files
authored
Merge pull request github#5773 from hvitved/dataflow/aggressive-caching
Data flow: Cache most language-dependent predicates
2 parents d7e560c + d66506b commit 74ae2e0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+3304
-3181
lines changed

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl.qll

Lines changed: 73 additions & 82 deletions
Large diffs are not rendered by default.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl2.qll

Lines changed: 73 additions & 82 deletions
Large diffs are not rendered by default.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl3.qll

Lines changed: 73 additions & 82 deletions
Large diffs are not rendered by default.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl4.qll

Lines changed: 73 additions & 82 deletions
Large diffs are not rendered by default.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll

Lines changed: 194 additions & 115 deletions
Large diffs are not rendered by default.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplLocal.qll

Lines changed: 73 additions & 82 deletions
Large diffs are not rendered by default.

cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,6 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) {
526526
* This is the local flow predicate that's used as a building block in global
527527
* data flow. It may have less flow than the `localFlowStep` predicate.
528528
*/
529-
cached
530529
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
531530
// Expr -> Expr
532531
exprToExprStep_nocfg(nodeFrom.asExpr(), nodeTo.asExpr())

cpp/ql/src/semmle/code/cpp/dataflow/internal/TaintTrackingUtil.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
4545
* local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent
4646
* different objects.
4747
*/
48+
cached
4849
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
4950
// Taint can flow through expressions that alter the value but preserve
5051
// more than one bit of it _or_ expressions that follow data through

cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 122 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -165,106 +165,133 @@ private predicate nodeIsBarrierEqualityCandidate(
165165
any(IRGuardCondition guard).ensuresEq(access, _, _, node.asInstruction().getBlock(), true)
166166
}
167167

168-
private predicate nodeIsBarrier(DataFlow::Node node) {
169-
exists(Variable checkedVar |
170-
readsVariable(node.asInstruction(), checkedVar) and
171-
hasUpperBoundsCheck(checkedVar)
172-
)
173-
or
174-
exists(Variable checkedVar, Operand access |
175-
/*
176-
* This node is guarded by a condition that forces the accessed variable
177-
* to equal something else. For example:
178-
* ```
179-
* x = taintsource()
180-
* if (x == 10) {
181-
* taintsink(x); // not considered tainted
182-
* }
183-
* ```
184-
*/
168+
cached
169+
private module Cached {
170+
cached
171+
predicate nodeIsBarrier(DataFlow::Node node) {
172+
exists(Variable checkedVar |
173+
readsVariable(node.asInstruction(), checkedVar) and
174+
hasUpperBoundsCheck(checkedVar)
175+
)
176+
or
177+
exists(Variable checkedVar, Operand access |
178+
/*
179+
* This node is guarded by a condition that forces the accessed variable
180+
* to equal something else. For example:
181+
* ```
182+
* x = taintsource()
183+
* if (x == 10) {
184+
* taintsink(x); // not considered tainted
185+
* }
186+
* ```
187+
*/
188+
189+
nodeIsBarrierEqualityCandidate(node, access, checkedVar) and
190+
readsVariable(access.getDef(), checkedVar)
191+
)
192+
}
185193

186-
nodeIsBarrierEqualityCandidate(node, access, checkedVar) and
187-
readsVariable(access.getDef(), checkedVar)
188-
)
189-
}
194+
cached
195+
predicate nodeIsBarrierIn(DataFlow::Node node) {
196+
// don't use dataflow into taint sources, as this leads to duplicate results.
197+
exists(Expr source | isUserInput(source, _) |
198+
node = DataFlow::exprNode(source)
199+
or
200+
// This case goes together with the similar (but not identical) rule in
201+
// `getNodeForSource`.
202+
node = DataFlow::definitionByReferenceNodeFromArgument(source)
203+
)
204+
or
205+
// don't use dataflow into binary instructions if both operands are unpredictable
206+
exists(BinaryInstruction iTo |
207+
iTo = node.asInstruction() and
208+
not predictableInstruction(iTo.getLeft()) and
209+
not predictableInstruction(iTo.getRight()) and
210+
// propagate taint from either the pointer or the offset, regardless of predictability
211+
not iTo instanceof PointerArithmeticInstruction
212+
)
213+
or
214+
// don't use dataflow through calls to pure functions if two or more operands
215+
// are unpredictable
216+
exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo |
217+
iTo = node.asInstruction() and
218+
isPureFunction(iTo.getStaticCallTarget().getName()) and
219+
iFrom1 = iTo.getAnArgument() and
220+
iFrom2 = iTo.getAnArgument() and
221+
not predictableInstruction(iFrom1) and
222+
not predictableInstruction(iFrom2) and
223+
iFrom1 != iFrom2
224+
)
225+
}
190226

191-
private predicate nodeIsBarrierIn(DataFlow::Node node) {
192-
// don't use dataflow into taint sources, as this leads to duplicate results.
193-
exists(Expr source | isUserInput(source, _) |
194-
node = DataFlow::exprNode(source)
227+
cached
228+
Element adjustedSink(DataFlow::Node sink) {
229+
// TODO: is it more appropriate to use asConvertedExpr here and avoid
230+
// `getConversion*`? Or will that cause us to miss some cases where there's
231+
// flow to a conversion (like a `ReferenceDereferenceExpr`) and we want to
232+
// pretend there was flow to the converted `Expr` for the sake of
233+
// compatibility.
234+
sink.asExpr().getConversion*() = result
195235
or
196-
// This case goes together with the similar (but not identical) rule in
197-
// `getNodeForSource`.
198-
node = DataFlow::definitionByReferenceNodeFromArgument(source)
199-
)
200-
or
201-
// don't use dataflow into binary instructions if both operands are unpredictable
202-
exists(BinaryInstruction iTo |
203-
iTo = node.asInstruction() and
204-
not predictableInstruction(iTo.getLeft()) and
205-
not predictableInstruction(iTo.getRight()) and
206-
// propagate taint from either the pointer or the offset, regardless of predictability
207-
not iTo instanceof PointerArithmeticInstruction
208-
)
209-
or
210-
// don't use dataflow through calls to pure functions if two or more operands
211-
// are unpredictable
212-
exists(Instruction iFrom1, Instruction iFrom2, CallInstruction iTo |
213-
iTo = node.asInstruction() and
214-
isPureFunction(iTo.getStaticCallTarget().getName()) and
215-
iFrom1 = iTo.getAnArgument() and
216-
iFrom2 = iTo.getAnArgument() and
217-
not predictableInstruction(iFrom1) and
218-
not predictableInstruction(iFrom2) and
219-
iFrom1 != iFrom2
220-
)
221-
}
236+
// For compatibility, send flow from arguments to parameters, even for
237+
// functions with no body.
238+
exists(FunctionCall call, int i |
239+
sink.asExpr() = call.getArgument(i) and
240+
result = resolveCall(call).getParameter(i)
241+
)
242+
or
243+
// For compatibility, send flow into a `Variable` if there is flow to any
244+
// Load or Store of that variable.
245+
exists(CopyInstruction copy |
246+
copy.getSourceValue() = sink.asInstruction() and
247+
(
248+
readsVariable(copy, result) or
249+
writesVariable(copy, result)
250+
) and
251+
not hasUpperBoundsCheck(result)
252+
)
253+
or
254+
// For compatibility, send flow into a `NotExpr` even if it's part of a
255+
// short-circuiting condition and thus might get skipped.
256+
result.(NotExpr).getOperand() = sink.asExpr()
257+
or
258+
// Taint postfix and prefix crement operations when their operand is tainted.
259+
result.(CrementOperation).getAnOperand() = sink.asExpr()
260+
or
261+
// Taint `e1 += e2`, `e &= e2` and friends when `e1` or `e2` is tainted.
262+
result.(AssignOperation).getAnOperand() = sink.asExpr()
263+
or
264+
result =
265+
sink.asOperand()
266+
.(SideEffectOperand)
267+
.getUse()
268+
.(ReadSideEffectInstruction)
269+
.getArgumentDef()
270+
.getUnconvertedResultExpression()
271+
}
222272

223-
private Element adjustedSink(DataFlow::Node sink) {
224-
// TODO: is it more appropriate to use asConvertedExpr here and avoid
225-
// `getConversion*`? Or will that cause us to miss some cases where there's
226-
// flow to a conversion (like a `ReferenceDereferenceExpr`) and we want to
227-
// pretend there was flow to the converted `Expr` for the sake of
228-
// compatibility.
229-
sink.asExpr().getConversion*() = result
230-
or
231-
// For compatibility, send flow from arguments to parameters, even for
232-
// functions with no body.
233-
exists(FunctionCall call, int i |
234-
sink.asExpr() = call.getArgument(i) and
235-
result = resolveCall(call).getParameter(i)
236-
)
237-
or
238-
// For compatibility, send flow into a `Variable` if there is flow to any
239-
// Load or Store of that variable.
240-
exists(CopyInstruction copy |
241-
copy.getSourceValue() = sink.asInstruction() and
242-
(
243-
readsVariable(copy, result) or
244-
writesVariable(copy, result)
245-
) and
246-
not hasUpperBoundsCheck(result)
247-
)
248-
or
249-
// For compatibility, send flow into a `NotExpr` even if it's part of a
250-
// short-circuiting condition and thus might get skipped.
251-
result.(NotExpr).getOperand() = sink.asExpr()
252-
or
253-
// Taint postfix and prefix crement operations when their operand is tainted.
254-
result.(CrementOperation).getAnOperand() = sink.asExpr()
255-
or
256-
// Taint `e1 += e2`, `e &= e2` and friends when `e1` or `e2` is tainted.
257-
result.(AssignOperation).getAnOperand() = sink.asExpr()
258-
or
259-
result =
260-
sink.asOperand()
261-
.(SideEffectOperand)
262-
.getUse()
263-
.(ReadSideEffectInstruction)
264-
.getArgumentDef()
265-
.getUnconvertedResultExpression()
273+
/**
274+
* Step to return value of a modeled function when an input taints the
275+
* dereference of the return value.
276+
*/
277+
cached
278+
predicate additionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
279+
exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut |
280+
n1.asOperand() = callInput(call, modelIn) and
281+
(
282+
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
283+
or
284+
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
285+
) and
286+
call.getStaticCallTarget() = func and
287+
modelOut.isReturnValueDeref() and
288+
call = n2.asInstruction()
289+
)
290+
}
266291
}
267292

293+
private import Cached
294+
268295
/**
269296
* Holds if `tainted` may contain taint from `source`.
270297
*
@@ -402,19 +429,7 @@ module TaintedWithPath {
402429
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
403430
)
404431
or
405-
// Step to return value of a modeled function when an input taints the
406-
// dereference of the return value
407-
exists(CallInstruction call, Function func, FunctionInput modelIn, FunctionOutput modelOut |
408-
n1.asOperand() = callInput(call, modelIn) and
409-
(
410-
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
411-
or
412-
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
413-
) and
414-
call.getStaticCallTarget() = func and
415-
modelOut.isReturnValueDeref() and
416-
call = n2.asInstruction()
417-
)
432+
additionalTaintStep(n1, n2)
418433
}
419434

420435
override predicate isSanitizer(DataFlow::Node node) {

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@ private import cpp
22
private import semmle.code.cpp.ir.IR
33
private import semmle.code.cpp.ir.dataflow.DataFlow
44
private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate
5+
private import DataFlowImplCommon as DataFlowImplCommon
56

67
/**
78
* Gets a function that might be called by `call`.
89
*/
10+
cached
911
Function viableCallable(CallInstruction call) {
12+
DataFlowImplCommon::forceCachingInSameStage() and
1013
result = call.getStaticCallTarget()
1114
or
1215
// If the target of the call does not have a body in the snapshot, it might
@@ -43,7 +46,6 @@ private module VirtualDispatch {
4346
abstract DataFlow::Node getDispatchValue();
4447

4548
/** Gets a candidate target for this call. */
46-
cached
4749
abstract Function resolve();
4850

4951
/**

0 commit comments

Comments
 (0)