@@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) {
127
127
api = returnNodeEnclosingCallable ( ret ) and
128
128
isOwnInstanceAccessNode ( ret )
129
129
) and
130
- result = Printing:: asValueModel ( api , qualifierString ( ) , "ReturnValue" )
130
+ result = Printing:: asLiftedValueModel ( api , qualifierString ( ) , "ReturnValue" )
131
131
}
132
132
133
133
private int accessPathLimit0 ( ) { result = 2 }
@@ -237,7 +237,7 @@ string captureThroughFlow0(
237
237
input = parameterNodeAsInput ( p ) and
238
238
output = getOutput ( returnNodeExt ) and
239
239
input != output and
240
- result = Printing:: asTaintModel ( api , input , output )
240
+ result = Printing:: asLiftedTaintModel ( api , input , output )
241
241
)
242
242
}
243
243
@@ -291,26 +291,269 @@ private string getContent(PropagateContentFlow::AccessPath ap, int i) {
291
291
)
292
292
}
293
293
294
+ /**
295
+ * Gets the MaD string representation of a store step access path.
296
+ */
294
297
private string printStoreAccessPath ( PropagateContentFlow:: AccessPath ap ) {
295
298
result = concat ( int i | | getContent ( ap , i ) , "" order by i )
296
299
}
297
300
301
+ /**
302
+ * Gets the MaD string representation of a read step access path.
303
+ */
298
304
private string printReadAccessPath ( PropagateContentFlow:: AccessPath ap ) {
299
305
result = concat ( int i | | getContent ( ap , i ) , "" order by i desc )
300
306
}
301
307
302
- string captureContentFlow ( DataFlowSummaryTargetApi api ) {
308
+ /**
309
+ * Holds if the access path `ap` contains a field or synthetic field access.
310
+ */
311
+ private predicate mentionsField ( PropagateContentFlow:: AccessPath ap ) {
312
+ exists ( ContentSet head , PropagateContentFlow:: AccessPath tail |
313
+ head = ap .getHead ( ) and
314
+ tail = ap .getTail ( ) and
315
+ ( mentionsField ( tail ) or isField ( head ) )
316
+ )
317
+ }
318
+
319
+ private predicate apiFlow (
320
+ DataFlowSummaryTargetApi api , DataFlow:: ParameterNode p , PropagateContentFlow:: AccessPath reads ,
321
+ ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath stores , boolean preservesValue
322
+ ) {
323
+ PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
324
+ returnNodeExt .getEnclosingCallable ( ) = api and
325
+ p .getEnclosingCallable ( ) = api
326
+ }
327
+
328
+ /**
329
+ * A class of APIs relevant for modeling using content flow.
330
+ * The following heuristic is applied:
331
+ * Content flow is only relevant for an API, if
332
+ * #content flow <= 2 * #parameters + 3
333
+ * If an API produces more content flow, it is likely that
334
+ * 1. Types are not sufficiently constrained leading to a combinatorial
335
+ * explosion in dispatch and thus in the generated summaries.
336
+ * 2. It is a reasonable approximation to use the non-content based flow
337
+ * detection instead, as reads and stores would use a significant
338
+ * part of an objects internal state.
339
+ */
340
+ private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
341
+ ContentDataFlowSummaryTargetApi ( ) {
342
+ count ( string input , string output |
343
+ exists (
344
+ DataFlow:: ParameterNode p , PropagateContentFlow:: AccessPath reads ,
345
+ ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath stores
346
+ |
347
+ apiFlow ( this , p , reads , returnNodeExt , stores , _) and
348
+ input = parameterNodeAsContentInput ( p ) + printReadAccessPath ( reads ) and
349
+ output = getContentOutput ( returnNodeExt ) + printStoreAccessPath ( stores )
350
+ )
351
+ ) <= 2 * this .getNumberOfParameters ( ) + 3
352
+ }
353
+ }
354
+
355
+ pragma [ nomagic]
356
+ private predicate apiContentFlow (
357
+ ContentDataFlowSummaryTargetApi api , DataFlow:: ParameterNode p ,
358
+ PropagateContentFlow:: AccessPath reads , ReturnNodeExt returnNodeExt ,
359
+ PropagateContentFlow:: AccessPath stores , boolean preservesValue
360
+ ) {
361
+ PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
362
+ returnNodeExt .getEnclosingCallable ( ) = api and
363
+ p .getEnclosingCallable ( ) = api
364
+ }
365
+
366
+ /**
367
+ * Holds if any of the content sets in `path` translates into a synthetic field.
368
+ */
369
+ private predicate hasSyntheticContent ( PropagateContentFlow:: AccessPath path ) {
370
+ exists ( PropagateContentFlow:: AccessPath tail , ContentSet head |
371
+ head = path .getHead ( ) and
372
+ tail = path .getTail ( ) and
373
+ (
374
+ exists ( getSyntheticName ( head ) ) or
375
+ hasSyntheticContent ( tail )
376
+ )
377
+ )
378
+ }
379
+
380
+ /**
381
+ * A module containing predicates for validating access paths containing content sets
382
+ * that translates into synthetic fields, when used for generated summary models.
383
+ */
384
+ private module AccessPathSyntheticValidation {
385
+ /**
386
+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
387
+ * to `store` (on type `t2`).
388
+ */
389
+ private predicate step (
390
+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
391
+ ) {
392
+ exists ( DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt |
393
+ p .getType ( ) = t1 and
394
+ returnNodeExt .getType ( ) = t2 and
395
+ apiContentFlow ( _, p , read , returnNodeExt , store , _)
396
+ )
397
+ }
398
+
399
+ /**
400
+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
401
+ * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does.
402
+ *
403
+ * Step A -> Synth.
404
+ */
405
+ private predicate synthPathEntry (
406
+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
407
+ ) {
408
+ not hasSyntheticContent ( read ) and
409
+ hasSyntheticContent ( store ) and
410
+ step ( t1 , read , t2 , store )
411
+ }
412
+
413
+ /**
414
+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
415
+ * to `store` (on type `t2`), where `read` has synthetic content
416
+ * and `store` does not.
417
+ *
418
+ * Step Synth -> A.
419
+ */
420
+ private predicate synthPathExit (
421
+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
422
+ ) {
423
+ hasSyntheticContent ( read ) and
424
+ not hasSyntheticContent ( store ) and
425
+ step ( t1 , read , t2 , store )
426
+ }
427
+
428
+ /**
429
+ * Takes one or more synthetic steps.
430
+ * Synth ->+ Synth
431
+ */
432
+ private predicate synthPathStepRec (
433
+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
434
+ ) {
435
+ hasSyntheticContent ( read ) and
436
+ hasSyntheticContent ( store ) and
437
+ (
438
+ step ( t1 , read , t2 , store )
439
+ or
440
+ exists ( PropagateContentFlow:: AccessPath mid , Type midType |
441
+ step ( t1 , read , midType , mid ) and synthPathStepRec ( midType , mid .reverse ( ) , t2 , store )
442
+ )
443
+ )
444
+ }
445
+
446
+ /**
447
+ * Holds if there exists a path of steps from `read` to an exit.
448
+ *
449
+ * read ->* Synth -> A
450
+ */
451
+ private predicate reachesSynthExit ( Type t , PropagateContentFlow:: AccessPath read ) {
452
+ synthPathExit ( t , read , _, _)
453
+ or
454
+ exists ( PropagateContentFlow:: AccessPath mid , Type midType |
455
+ synthPathStepRec ( t , read , midType , mid ) and synthPathExit ( midType , mid .reverse ( ) , _, _)
456
+ )
457
+ }
458
+
459
+ /**
460
+ * Holds if there exists a path of steps from an entry to `store`.
461
+ *
462
+ * A -> Synth ->* store
463
+ */
464
+ private predicate synthEntryReaches ( Type t , PropagateContentFlow:: AccessPath store ) {
465
+ synthPathEntry ( _, _, t , store )
466
+ or
467
+ exists ( PropagateContentFlow:: AccessPath mid , Type midType |
468
+ synthPathEntry ( _, _, midType , mid ) and synthPathStepRec ( midType , mid .reverse ( ) , t , store )
469
+ )
470
+ }
471
+
472
+ /**
473
+ * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`)
474
+ * contain content that will be translated into a synthetic field, when being used in
475
+ * a MaD summary model, and if there is a range of APIs, such that
476
+ * when chaining their flow access paths, there exists access paths `A` and `B` where
477
+ * A ->* read -> store ->* B and where `A` and `B` do not contain content that will
478
+ * be translated into a synthetic field.
479
+ *
480
+ * This is needed because we don't want to include summaries that reads from or
481
+ * stores into a "dead" synthetic field.
482
+ *
483
+ * Example:
484
+ * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and
485
+ * `setX`, which gets and sets a private field `X` on `t`.
486
+ * This would lead to the following content flows
487
+ * getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
488
+ * setX : Argument[0] -> Argument[this].SyntheticField[t.X]
489
+ * As the reads and stores are on synthetic fields we should only make summaries
490
+ * if both of these methods exist.
491
+ */
492
+ pragma [ nomagic]
493
+ predicate acceptReadStore (
494
+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
495
+ ) {
496
+ synthPathEntry ( t1 , read , t2 , store ) and reachesSynthExit ( t2 , store .reverse ( ) )
497
+ or
498
+ exists ( PropagateContentFlow:: AccessPath store0 | store0 .reverse ( ) = read |
499
+ synthEntryReaches ( t1 , store0 ) and synthPathExit ( t1 , read , t2 , store )
500
+ or
501
+ synthEntryReaches ( t1 , store0 ) and
502
+ step ( t1 , read , t2 , store ) and
503
+ reachesSynthExit ( t2 , store .reverse ( ) )
504
+ )
505
+ }
506
+ }
507
+
508
+ /**
509
+ * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
510
+ * Flow is considered relevant,
511
+ * 1. If `read` or `store` do not contain a content set that translates into a synthetic field.
512
+ * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if
513
+ * the synthetic content is "live" on the relevant declaring type.
514
+ */
515
+ private predicate apiRelevantContentFlow (
516
+ ContentDataFlowSummaryTargetApi api , DataFlow:: ParameterNode p ,
517
+ PropagateContentFlow:: AccessPath read , ReturnNodeExt returnNodeExt ,
518
+ PropagateContentFlow:: AccessPath store , boolean preservesValue
519
+ ) {
520
+ apiContentFlow ( api , p , read , returnNodeExt , store , preservesValue ) and
521
+ (
522
+ not hasSyntheticContent ( read ) and not hasSyntheticContent ( store )
523
+ or
524
+ AccessPathSyntheticValidation:: acceptReadStore ( p .getType ( ) , read , returnNodeExt .getType ( ) , store )
525
+ )
526
+ }
527
+
528
+ pragma [ nomagic]
529
+ private predicate captureContentFlow0 (
530
+ ContentDataFlowSummaryTargetApi api , string input , string output , boolean preservesValue ,
531
+ boolean lift
532
+ ) {
303
533
exists (
304
- DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt , string input , string output ,
305
- PropagateContentFlow:: AccessPath reads , PropagateContentFlow:: AccessPath stores ,
306
- boolean preservesValue
534
+ DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath reads ,
535
+ PropagateContentFlow:: AccessPath stores
307
536
|
308
- PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
309
- returnNodeExt .getEnclosingCallable ( ) = api and
537
+ apiRelevantContentFlow ( api , p , reads , returnNodeExt , stores , preservesValue ) and
310
538
input = parameterNodeAsContentInput ( p ) + printReadAccessPath ( reads ) and
311
539
output = getContentOutput ( returnNodeExt ) + printStoreAccessPath ( stores ) and
312
540
input != output and
313
- result = Printing:: asModel ( api , input , output , preservesValue )
541
+ ( if mentionsField ( reads ) or mentionsField ( stores ) then lift = false else lift = true )
542
+ )
543
+ }
544
+
545
+ /**
546
+ * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
547
+ * the return value or a parameter).
548
+ *
549
+ * Models are lifted to the best type in case the read and store access paths do not
550
+ * contain a field or synthetic field access.
551
+ */
552
+ string captureContentFlow ( ContentDataFlowSummaryTargetApi api ) {
553
+ exists ( string input , string output , boolean lift , boolean preservesValue |
554
+ captureContentFlow0 ( api , input , output , _, lift ) and
555
+ preservesValue = max ( boolean p | captureContentFlow0 ( api , input , output , p , lift ) ) and
556
+ result = Printing:: asModel ( api , input , output , preservesValue , lift )
314
557
)
315
558
}
316
559
0 commit comments