Skip to content

Commit 3914a93

Browse files
committed
C++: Remove commonTaintStep from DefaultTaintTracking.
1 parent 43fbcc1 commit 3914a93

File tree

1 file changed

+0
-209
lines changed

1 file changed

+0
-209
lines changed

cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 0 additions & 209 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,6 @@ private class DefaultTaintTrackingCfg extends TaintTracking::Configuration {
7474

7575
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
7676

77-
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
78-
commonTaintStep(n1, n2)
79-
}
80-
8177
override predicate isSanitizer(DataFlow::Node node) { nodeIsBarrier(node) }
8278

8379
override predicate isSanitizerIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
@@ -93,8 +89,6 @@ private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration {
9389
}
9490

9591
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
96-
commonTaintStep(n1, n2)
97-
or
9892
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
9993
or
10094
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
@@ -117,8 +111,6 @@ private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuratio
117111
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
118112

119113
override predicate isAdditionalTaintStep(DataFlow::Node n1, DataFlow::Node n2) {
120-
commonTaintStep(n1, n2)
121-
or
122114
// Additional step for flow out of variables. There is no flow _into_
123115
// variables in this configuration, so this step only serves to take flow
124116
// out of a variable that's a source.
@@ -227,207 +219,6 @@ private predicate nodeIsBarrierIn(DataFlow::Node node) {
227219
)
228220
}
229221

230-
cached
231-
private predicate commonTaintStep(DataFlow::Node fromNode, DataFlow::Node toNode) {
232-
operandToInstructionTaintStep(fromNode.asOperand(), toNode.asInstruction())
233-
or
234-
instructionToOperandTaintStep(fromNode.asInstruction(), toNode.asOperand())
235-
}
236-
237-
private predicate instructionToOperandTaintStep(Instruction fromInstr, Operand toOperand) {
238-
// Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
239-
// We only do this in certain cases:
240-
// 1. The instruction's result must not be conflated, and
241-
// 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
242-
// this is array types and union types. This matches the other two cases of element-to-object flow in
243-
// `DefaultTaintTracking`.
244-
toOperand.getAnyDef() = fromInstr and
245-
not fromInstr.isResultConflated() and
246-
(
247-
fromInstr.getResultType() instanceof ArrayType or
248-
fromInstr.getResultType() instanceof Union
249-
)
250-
or
251-
exists(ReadSideEffectInstruction readInstr |
252-
fromInstr = readInstr.getArgumentDef() and
253-
toOperand = readInstr.getSideEffectOperand()
254-
)
255-
}
256-
257-
private predicate operandToInstructionTaintStep(Operand fromOperand, Instruction toInstr) {
258-
// Expressions computed from tainted data are also tainted
259-
exists(CallInstruction call, int argIndex | call = toInstr |
260-
isPureFunction(call.getStaticCallTarget().getName()) and
261-
fromOperand = getACallArgumentOrIndirection(call, argIndex) and
262-
forall(Operand argOperand | argOperand = call.getAnArgumentOperand() |
263-
argOperand = getACallArgumentOrIndirection(call, argIndex) or
264-
predictableInstruction(argOperand.getAnyDef())
265-
) and
266-
// flow through `strlen` tends to cause dubious results, if the length is
267-
// bounded.
268-
not call.getStaticCallTarget().getName() = "strlen"
269-
)
270-
or
271-
// Flow from argument to return value
272-
toInstr =
273-
any(CallInstruction call |
274-
exists(int indexIn |
275-
modelTaintToReturnValue(call.getStaticCallTarget(), indexIn) and
276-
fromOperand = getACallArgumentOrIndirection(call, indexIn) and
277-
not predictableOnlyFlow(call.getStaticCallTarget().getName())
278-
)
279-
)
280-
or
281-
// Flow from input argument to output argument
282-
// TODO: This won't work in practice as long as all aliased memory is tracked
283-
// together in a single virtual variable.
284-
// TODO: Will this work on the test for `TaintedPath.ql`, where the output arg
285-
// is a pointer addition expression?
286-
toInstr =
287-
any(WriteSideEffectInstruction outInstr |
288-
exists(CallInstruction call, int indexIn, int indexOut |
289-
modelTaintToParameter(call.getStaticCallTarget(), indexIn, indexOut) and
290-
fromOperand = getACallArgumentOrIndirection(call, indexIn) and
291-
outInstr.getIndex() = indexOut and
292-
outInstr.getPrimaryInstruction() = call
293-
)
294-
)
295-
or
296-
// Flow through pointer dereference
297-
toInstr.(LoadInstruction).getSourceAddressOperand() = fromOperand
298-
or
299-
// Flow through partial reads of arrays and unions
300-
toInstr.(LoadInstruction).getSourceValueOperand() = fromOperand and
301-
exists(Instruction fromInstr | fromInstr = fromOperand.getAnyDef() |
302-
not fromInstr.isResultConflated() and
303-
(
304-
fromInstr.getResultType() instanceof ArrayType or
305-
fromInstr.getResultType() instanceof Union
306-
)
307-
)
308-
or
309-
// Unary instructions tend to preserve enough information in practice that we
310-
// want taint to flow through.
311-
// The exception is `FieldAddressInstruction`. Together with the rule for
312-
// `LoadInstruction` above and for `ChiInstruction` below, flow through
313-
// `FieldAddressInstruction` could cause flow into one field to come out an
314-
// unrelated field. This would happen across function boundaries, where the IR
315-
// would not be able to match loads to stores.
316-
toInstr.(UnaryInstruction).getUnaryOperand() = fromOperand and
317-
(
318-
not toInstr instanceof FieldAddressInstruction
319-
or
320-
toInstr.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union
321-
)
322-
or
323-
// Flow from an element to an array or union that contains it.
324-
toInstr.(ChiInstruction).getPartialOperand() = fromOperand and
325-
not toInstr.isResultConflated() and
326-
exists(Type t | toInstr.getResultLanguageType().hasType(t, false) |
327-
t instanceof Union
328-
or
329-
t instanceof ArrayType
330-
)
331-
or
332-
exists(BinaryInstruction bin |
333-
bin = toInstr and
334-
predictableInstruction(toInstr.getAnOperand().getDef()) and
335-
fromOperand = toInstr.getAnOperand()
336-
)
337-
or
338-
// This is part of the translation of `a[i]`, where we want taint to flow
339-
// from `a`.
340-
toInstr.(PointerAddInstruction).getLeftOperand() = fromOperand
341-
or
342-
// Until we have flow through indirections across calls, we'll take flow out
343-
// of the indirection and into the argument.
344-
// When we get proper flow through indirections across calls, this code can be
345-
// moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
346-
exists(ReadSideEffectInstruction read |
347-
read.getSideEffectOperand() = fromOperand and
348-
read.getArgumentDef() = toInstr
349-
)
350-
or
351-
// Until we have from through indirections across calls, we'll take flow out
352-
// of the parameter and into its indirection.
353-
// `InitializeIndirectionInstruction` only has a single operand: the address of the
354-
// value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
355-
// the IR looks like this:
356-
// ```
357-
// m1 = InitializeParameter[p] : &r1
358-
// r2 = Load[p] : r2, m1
359-
// m3 = InitializeIndirection[p] : &r2
360-
// ```
361-
// So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
362-
// `LoadOperand`'s overlap being exact.
363-
toInstr.(InitializeIndirectionInstruction).getAnOperand() = fromOperand
364-
}
365-
366-
/**
367-
* Returns the index of the side effect instruction corresponding to the specified function output,
368-
* if one exists.
369-
*/
370-
private int getWriteSideEffectIndex(FunctionOutput output) {
371-
output.isParameterDeref(result)
372-
or
373-
output.isQualifierObject() and result = -1
374-
}
375-
376-
/**
377-
* Get an operand that goes into argument `argumentIndex` of `call`. This
378-
* can be either directly or through one pointer indirection.
379-
*/
380-
private Operand getACallArgumentOrIndirection(CallInstruction call, int argumentIndex) {
381-
result = call.getPositionalArgumentOperand(argumentIndex)
382-
or
383-
exists(ReadSideEffectInstruction readSE |
384-
// TODO: why are read side effect operands imprecise?
385-
result = readSE.getSideEffectOperand() and
386-
readSE.getPrimaryInstruction() = call and
387-
readSE.getIndex() = argumentIndex
388-
)
389-
}
390-
391-
private predicate modelTaintToParameter(Function f, int parameterIn, int parameterOut) {
392-
exists(FunctionInput modelIn, FunctionOutput modelOut |
393-
(
394-
f.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
395-
or
396-
f.(TaintFunction).hasTaintFlow(modelIn, modelOut)
397-
) and
398-
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
399-
parameterOut = getWriteSideEffectIndex(modelOut)
400-
)
401-
}
402-
403-
private predicate modelTaintToReturnValue(Function f, int parameterIn) {
404-
// Taint flow from parameter to return value
405-
exists(FunctionInput modelIn, FunctionOutput modelOut |
406-
f.(TaintFunction).hasTaintFlow(modelIn, modelOut) and
407-
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
408-
(modelOut.isReturnValue() or modelOut.isReturnValueDeref())
409-
)
410-
or
411-
// Data flow (not taint flow) to where the return value points. For the time
412-
// being we will conflate pointers and objects in taint tracking.
413-
exists(FunctionInput modelIn, FunctionOutput modelOut |
414-
f.(DataFlowFunction).hasDataFlow(modelIn, modelOut) and
415-
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
416-
modelOut.isReturnValueDeref()
417-
)
418-
or
419-
// Taint flow from one argument to another and data flow from an argument to a
420-
// return value. This happens in functions like `strcat` and `memcpy`. We
421-
// could model this flow in two separate steps, but that would add reverse
422-
// flow from the write side-effect to the call instruction, which may not be
423-
// desirable.
424-
exists(int parameterMid, InParameter modelMid, OutReturnValue returnOut |
425-
modelTaintToParameter(f, parameterIn, parameterMid) and
426-
modelMid.isParameter(parameterMid) and
427-
f.(DataFlowFunction).hasDataFlow(modelMid, returnOut)
428-
)
429-
}
430-
431222
private Element adjustedSink(DataFlow::Node sink) {
432223
// TODO: is it more appropriate to use asConvertedExpr here and avoid
433224
// `getConversion*`? Or will that cause us to miss some cases where there's

0 commit comments

Comments
 (0)