@@ -74,10 +74,6 @@ private class DefaultTaintTrackingCfg extends TaintTracking::Configuration {
74
74
75
75
override predicate isSink ( DataFlow:: Node sink ) { exists ( adjustedSink ( sink ) ) }
76
76
77
- override predicate isAdditionalTaintStep ( DataFlow:: Node n1 , DataFlow:: Node n2 ) {
78
- commonTaintStep ( n1 , n2 )
79
- }
80
-
81
77
override predicate isSanitizer ( DataFlow:: Node node ) { nodeIsBarrier ( node ) }
82
78
83
79
override predicate isSanitizerIn ( DataFlow:: Node node ) { nodeIsBarrierIn ( node ) }
@@ -93,8 +89,6 @@ private class ToGlobalVarTaintTrackingCfg extends TaintTracking::Configuration {
93
89
}
94
90
95
91
override predicate isAdditionalTaintStep ( DataFlow:: Node n1 , DataFlow:: Node n2 ) {
96
- commonTaintStep ( n1 , n2 )
97
- or
98
92
writesVariable ( n1 .asInstruction ( ) , n2 .asVariable ( ) .( GlobalOrNamespaceVariable ) )
99
93
or
100
94
readsVariable ( n2 .asInstruction ( ) , n1 .asVariable ( ) .( GlobalOrNamespaceVariable ) )
@@ -117,8 +111,6 @@ private class FromGlobalVarTaintTrackingCfg extends TaintTracking2::Configuratio
117
111
override predicate isSink ( DataFlow:: Node sink ) { exists ( adjustedSink ( sink ) ) }
118
112
119
113
override predicate isAdditionalTaintStep ( DataFlow:: Node n1 , DataFlow:: Node n2 ) {
120
- commonTaintStep ( n1 , n2 )
121
- or
122
114
// Additional step for flow out of variables. There is no flow _into_
123
115
// variables in this configuration, so this step only serves to take flow
124
116
// out of a variable that's a source.
@@ -227,207 +219,6 @@ private predicate nodeIsBarrierIn(DataFlow::Node node) {
227
219
)
228
220
}
229
221
230
- cached
231
- private predicate commonTaintStep ( DataFlow:: Node fromNode , DataFlow:: Node toNode ) {
232
- operandToInstructionTaintStep ( fromNode .asOperand ( ) , toNode .asInstruction ( ) )
233
- or
234
- instructionToOperandTaintStep ( fromNode .asInstruction ( ) , toNode .asOperand ( ) )
235
- }
236
-
237
- private predicate instructionToOperandTaintStep ( Instruction fromInstr , Operand toOperand ) {
238
- // Propagate flow from the definition of an operand to the operand, even when the overlap is inexact.
239
- // We only do this in certain cases:
240
- // 1. The instruction's result must not be conflated, and
241
- // 2. The instruction's result type is one the types where we expect element-to-object flow. Currently
242
- // this is array types and union types. This matches the other two cases of element-to-object flow in
243
- // `DefaultTaintTracking`.
244
- toOperand .getAnyDef ( ) = fromInstr and
245
- not fromInstr .isResultConflated ( ) and
246
- (
247
- fromInstr .getResultType ( ) instanceof ArrayType or
248
- fromInstr .getResultType ( ) instanceof Union
249
- )
250
- or
251
- exists ( ReadSideEffectInstruction readInstr |
252
- fromInstr = readInstr .getArgumentDef ( ) and
253
- toOperand = readInstr .getSideEffectOperand ( )
254
- )
255
- }
256
-
257
- private predicate operandToInstructionTaintStep ( Operand fromOperand , Instruction toInstr ) {
258
- // Expressions computed from tainted data are also tainted
259
- exists ( CallInstruction call , int argIndex | call = toInstr |
260
- isPureFunction ( call .getStaticCallTarget ( ) .getName ( ) ) and
261
- fromOperand = getACallArgumentOrIndirection ( call , argIndex ) and
262
- forall ( Operand argOperand | argOperand = call .getAnArgumentOperand ( ) |
263
- argOperand = getACallArgumentOrIndirection ( call , argIndex ) or
264
- predictableInstruction ( argOperand .getAnyDef ( ) )
265
- ) and
266
- // flow through `strlen` tends to cause dubious results, if the length is
267
- // bounded.
268
- not call .getStaticCallTarget ( ) .getName ( ) = "strlen"
269
- )
270
- or
271
- // Flow from argument to return value
272
- toInstr =
273
- any ( CallInstruction call |
274
- exists ( int indexIn |
275
- modelTaintToReturnValue ( call .getStaticCallTarget ( ) , indexIn ) and
276
- fromOperand = getACallArgumentOrIndirection ( call , indexIn ) and
277
- not predictableOnlyFlow ( call .getStaticCallTarget ( ) .getName ( ) )
278
- )
279
- )
280
- or
281
- // Flow from input argument to output argument
282
- // TODO: This won't work in practice as long as all aliased memory is tracked
283
- // together in a single virtual variable.
284
- // TODO: Will this work on the test for `TaintedPath.ql`, where the output arg
285
- // is a pointer addition expression?
286
- toInstr =
287
- any ( WriteSideEffectInstruction outInstr |
288
- exists ( CallInstruction call , int indexIn , int indexOut |
289
- modelTaintToParameter ( call .getStaticCallTarget ( ) , indexIn , indexOut ) and
290
- fromOperand = getACallArgumentOrIndirection ( call , indexIn ) and
291
- outInstr .getIndex ( ) = indexOut and
292
- outInstr .getPrimaryInstruction ( ) = call
293
- )
294
- )
295
- or
296
- // Flow through pointer dereference
297
- toInstr .( LoadInstruction ) .getSourceAddressOperand ( ) = fromOperand
298
- or
299
- // Flow through partial reads of arrays and unions
300
- toInstr .( LoadInstruction ) .getSourceValueOperand ( ) = fromOperand and
301
- exists ( Instruction fromInstr | fromInstr = fromOperand .getAnyDef ( ) |
302
- not fromInstr .isResultConflated ( ) and
303
- (
304
- fromInstr .getResultType ( ) instanceof ArrayType or
305
- fromInstr .getResultType ( ) instanceof Union
306
- )
307
- )
308
- or
309
- // Unary instructions tend to preserve enough information in practice that we
310
- // want taint to flow through.
311
- // The exception is `FieldAddressInstruction`. Together with the rule for
312
- // `LoadInstruction` above and for `ChiInstruction` below, flow through
313
- // `FieldAddressInstruction` could cause flow into one field to come out an
314
- // unrelated field. This would happen across function boundaries, where the IR
315
- // would not be able to match loads to stores.
316
- toInstr .( UnaryInstruction ) .getUnaryOperand ( ) = fromOperand and
317
- (
318
- not toInstr instanceof FieldAddressInstruction
319
- or
320
- toInstr .( FieldAddressInstruction ) .getField ( ) .getDeclaringType ( ) instanceof Union
321
- )
322
- or
323
- // Flow from an element to an array or union that contains it.
324
- toInstr .( ChiInstruction ) .getPartialOperand ( ) = fromOperand and
325
- not toInstr .isResultConflated ( ) and
326
- exists ( Type t | toInstr .getResultLanguageType ( ) .hasType ( t , false ) |
327
- t instanceof Union
328
- or
329
- t instanceof ArrayType
330
- )
331
- or
332
- exists ( BinaryInstruction bin |
333
- bin = toInstr and
334
- predictableInstruction ( toInstr .getAnOperand ( ) .getDef ( ) ) and
335
- fromOperand = toInstr .getAnOperand ( )
336
- )
337
- or
338
- // This is part of the translation of `a[i]`, where we want taint to flow
339
- // from `a`.
340
- toInstr .( PointerAddInstruction ) .getLeftOperand ( ) = fromOperand
341
- or
342
- // Until we have flow through indirections across calls, we'll take flow out
343
- // of the indirection and into the argument.
344
- // When we get proper flow through indirections across calls, this code can be
345
- // moved to `adjusedSink` or possibly into the `DataFlow::ExprNode` class.
346
- exists ( ReadSideEffectInstruction read |
347
- read .getSideEffectOperand ( ) = fromOperand and
348
- read .getArgumentDef ( ) = toInstr
349
- )
350
- or
351
- // Until we have from through indirections across calls, we'll take flow out
352
- // of the parameter and into its indirection.
353
- // `InitializeIndirectionInstruction` only has a single operand: the address of the
354
- // value whose indirection we are initializing. When initializing an indirection of a parameter `p`,
355
- // the IR looks like this:
356
- // ```
357
- // m1 = InitializeParameter[p] : &r1
358
- // r2 = Load[p] : r2, m1
359
- // m3 = InitializeIndirection[p] : &r2
360
- // ```
361
- // So by having flow from `r2` to `m3` we're enabling flow from `m1` to `m3`. This relies on the
362
- // `LoadOperand`'s overlap being exact.
363
- toInstr .( InitializeIndirectionInstruction ) .getAnOperand ( ) = fromOperand
364
- }
365
-
366
- /**
367
- * Returns the index of the side effect instruction corresponding to the specified function output,
368
- * if one exists.
369
- */
370
- private int getWriteSideEffectIndex ( FunctionOutput output ) {
371
- output .isParameterDeref ( result )
372
- or
373
- output .isQualifierObject ( ) and result = - 1
374
- }
375
-
376
- /**
377
- * Get an operand that goes into argument `argumentIndex` of `call`. This
378
- * can be either directly or through one pointer indirection.
379
- */
380
- private Operand getACallArgumentOrIndirection ( CallInstruction call , int argumentIndex ) {
381
- result = call .getPositionalArgumentOperand ( argumentIndex )
382
- or
383
- exists ( ReadSideEffectInstruction readSE |
384
- // TODO: why are read side effect operands imprecise?
385
- result = readSE .getSideEffectOperand ( ) and
386
- readSE .getPrimaryInstruction ( ) = call and
387
- readSE .getIndex ( ) = argumentIndex
388
- )
389
- }
390
-
391
- private predicate modelTaintToParameter ( Function f , int parameterIn , int parameterOut ) {
392
- exists ( FunctionInput modelIn , FunctionOutput modelOut |
393
- (
394
- f .( DataFlowFunction ) .hasDataFlow ( modelIn , modelOut )
395
- or
396
- f .( TaintFunction ) .hasTaintFlow ( modelIn , modelOut )
397
- ) and
398
- ( modelIn .isParameter ( parameterIn ) or modelIn .isParameterDeref ( parameterIn ) ) and
399
- parameterOut = getWriteSideEffectIndex ( modelOut )
400
- )
401
- }
402
-
403
- private predicate modelTaintToReturnValue ( Function f , int parameterIn ) {
404
- // Taint flow from parameter to return value
405
- exists ( FunctionInput modelIn , FunctionOutput modelOut |
406
- f .( TaintFunction ) .hasTaintFlow ( modelIn , modelOut ) and
407
- ( modelIn .isParameter ( parameterIn ) or modelIn .isParameterDeref ( parameterIn ) ) and
408
- ( modelOut .isReturnValue ( ) or modelOut .isReturnValueDeref ( ) )
409
- )
410
- or
411
- // Data flow (not taint flow) to where the return value points. For the time
412
- // being we will conflate pointers and objects in taint tracking.
413
- exists ( FunctionInput modelIn , FunctionOutput modelOut |
414
- f .( DataFlowFunction ) .hasDataFlow ( modelIn , modelOut ) and
415
- ( modelIn .isParameter ( parameterIn ) or modelIn .isParameterDeref ( parameterIn ) ) and
416
- modelOut .isReturnValueDeref ( )
417
- )
418
- or
419
- // Taint flow from one argument to another and data flow from an argument to a
420
- // return value. This happens in functions like `strcat` and `memcpy`. We
421
- // could model this flow in two separate steps, but that would add reverse
422
- // flow from the write side-effect to the call instruction, which may not be
423
- // desirable.
424
- exists ( int parameterMid , InParameter modelMid , OutReturnValue returnOut |
425
- modelTaintToParameter ( f , parameterIn , parameterMid ) and
426
- modelMid .isParameter ( parameterMid ) and
427
- f .( DataFlowFunction ) .hasDataFlow ( modelMid , returnOut )
428
- )
429
- }
430
-
431
222
private Element adjustedSink ( DataFlow:: Node sink ) {
432
223
// TODO: is it more appropriate to use asConvertedExpr here and avoid
433
224
// `getConversion*`? Or will that cause us to miss some cases where there's
0 commit comments