Skip to content

Commit 607a1e4

Browse files
committed
Shared: Generate value-preserving summaries when possible.
1 parent f5a295c commit 607a1e4

File tree

2 files changed

+150
-49
lines changed

2 files changed

+150
-49
lines changed

shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll

Lines changed: 145 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ private import codeql.dataflow.internal.ContentDataFlowImpl
1111
private import codeql.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
1212
private import codeql.util.Location
1313
private import ModelPrinting
14+
private import codeql.util.Unit
1415

1516
/**
1617
* Provides language-specific model generator parameters.
@@ -464,14 +465,22 @@ module MakeModelGenerator<
464465
override string toString() { result = "TaintStore(" + step + ")" }
465466
}
466467

467-
/**
468-
* A data flow configuration for tracking flow through APIs.
469-
* The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters.
470-
*
471-
* This can be used to generate Flow summaries for APIs from parameter to return.
472-
*/
473-
private module PropagateFlowConfig implements DataFlow::StateConfigSig {
474-
class FlowState = TaintState;
468+
private signature module PropagateFlowConfigInputSig {
469+
class FlowState;
470+
471+
FlowState initialState();
472+
473+
default predicate isAdditionalFlowStep(
474+
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
475+
) {
476+
none()
477+
}
478+
}
479+
480+
private module PropagateFlowConfig<PropagateFlowConfigInputSig PropagateFlowConfigInput>
481+
implements DataFlow::StateConfigSig
482+
{
483+
import PropagateFlowConfigInput
475484

476485
predicate isSource(DataFlow::Node source, FlowState state) {
477486
source instanceof DataFlow::ParameterNode and
@@ -480,7 +489,7 @@ module MakeModelGenerator<
480489
c instanceof DataFlowSummaryTargetApi and
481490
not isUninterestingForHeuristicDataFlowModels(c)
482491
) and
483-
state.(TaintRead).getStep() = 0
492+
state = initialState()
484493
}
485494

486495
predicate isSink(DataFlow::Node sink, FlowState state) {
@@ -494,6 +503,31 @@ module MakeModelGenerator<
494503
not exists(captureQualifierFlow(getAsExprEnclosingCallable(sink)))
495504
}
496505

506+
predicate isAdditionalFlowStep = PropagateFlowConfigInput::isAdditionalFlowStep/4;
507+
508+
predicate isBarrier(DataFlow::Node n) {
509+
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
510+
}
511+
512+
DataFlow::FlowFeature getAFeature() {
513+
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
514+
}
515+
}
516+
517+
/**
518+
* A module used to construct a data flow configuration for tracking taint-
519+
* flow through APIs.
520+
* The sources are the parameters of an API and the sinks are the return
521+
* values (excluding `this`) and parameters.
522+
*
523+
* This can be used to generate flow summaries for APIs from parameter to
524+
* return.
525+
*/
526+
module PropagateFlowConfigInputTaintInput implements PropagateFlowConfigInputSig {
527+
class FlowState = TaintState;
528+
529+
FlowState initialState() { result.(TaintRead).getStep() = 0 }
530+
497531
predicate isAdditionalFlowStep(
498532
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
499533
) {
@@ -515,51 +549,107 @@ module MakeModelGenerator<
515549
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
516550
)
517551
}
552+
}
518553

519-
predicate isBarrier(DataFlow::Node n) {
520-
exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t))
521-
}
554+
/**
555+
* A data flow configuration for tracking taint-flow through APIs.
556+
* The sources are the parameters of an API and the sinks are the return
557+
* values (excluding `this`) and parameters.
558+
*
559+
* This can be used to generate flow summaries for APIs from parameter to
560+
* return.
561+
*/
562+
private module PropagateTaintFlowConfig =
563+
PropagateFlowConfig<PropagateFlowConfigInputTaintInput>;
522564

523-
DataFlow::FlowFeature getAFeature() {
524-
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
525-
}
565+
module PropagateTaintFlow = TaintTracking::GlobalWithState<PropagateTaintFlowConfig>;
566+
567+
/**
568+
* A module used to construct a data flow configuration for tracking
569+
* data flow through APIs.
570+
* The sources are the parameters of an API and the sinks are the return
571+
* values (excluding `this`) and parameters.
572+
*
573+
* This can be used to generate value-preserving flow summaries for APIs
574+
* from parameter to return.
575+
*/
576+
module PropagateFlowConfigInputDataFlowInput implements PropagateFlowConfigInputSig {
577+
class FlowState = Unit;
578+
579+
FlowState initialState() { any() }
526580
}
527581

528-
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
582+
/**
583+
* A data flow configuration for tracking data flow through APIs.
584+
* The sources are the parameters of an API and the sinks are the return
585+
* values (excluding `this`) and parameters.
586+
*
587+
* This can be used to generate flow summaries for APIs from parameter to
588+
* return.
589+
*/
590+
private module PropagateDataFlowConfig =
591+
PropagateFlowConfig<PropagateFlowConfigInputDataFlowInput>;
592+
593+
module PropagateDataFlow = DataFlow::GlobalWithState<PropagateDataFlowConfig>;
529594

530595
/**
531-
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
596+
* Holds if there should be a summary of `api` specifying flow from `p`
597+
* to `returnNodeExt`.
532598
*/
533-
string captureThroughFlow0(
599+
predicate captureThroughFlow0(
534600
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
535601
) {
536-
exists(string input, string output |
537-
getEnclosingCallable(p) = api and
538-
getEnclosingCallable(returnNodeExt) = api and
539-
input = parameterNodeAsInput(p) and
540-
output = getOutput(returnNodeExt) and
541-
input != output and
542-
result = ModelPrinting::asLiftedTaintModel(api, input, output)
543-
)
602+
captureThroughFlow0(api, p, _, returnNodeExt, _, _)
603+
}
604+
605+
/**
606+
* Holds if there should be a summary of `api` specifying flow
607+
* from `p` (with summary component `input`) to `returnNodeExt` (with
608+
* summary component `output`).
609+
*
610+
* `preservesValue` is true if the summary is value-preserving, or `false`
611+
* otherwise.
612+
*/
613+
private predicate captureThroughFlow0(
614+
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, string input,
615+
ReturnNodeExt returnNodeExt, string output, boolean preservesValue
616+
) {
617+
(
618+
PropagateDataFlow::flow(p, returnNodeExt) and preservesValue = true
619+
or
620+
not PropagateDataFlow::flow(p, returnNodeExt) and
621+
PropagateTaintFlow::flow(p, returnNodeExt) and
622+
preservesValue = false
623+
) and
624+
getEnclosingCallable(p) = api and
625+
getEnclosingCallable(returnNodeExt) = api and
626+
input = parameterNodeAsInput(p) and
627+
output = getOutput(returnNodeExt) and
628+
input != output
544629
}
545630

546631
/**
547632
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
633+
*
634+
* `preservesValue` is `true` if the summary is value-preserving, and `false` otherwise.
548635
*/
549-
private string captureThroughFlow(DataFlowSummaryTargetApi api) {
550-
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
551-
PropagateFlow::flow(p, returnNodeExt) and
552-
result = captureThroughFlow0(api, p, returnNodeExt)
636+
private string captureThroughFlow(DataFlowSummaryTargetApi api, boolean preservesValue) {
637+
exists(string input, string output |
638+
preservesValue = max(boolean b | captureThroughFlow0(api, _, input, _, output, b)) and
639+
result = ModelPrinting::asLiftedTaintModel(api, input, output, preservesValue)
553640
)
554641
}
555642

556643
/**
557644
* Gets the summary model(s) of `api`, if there is flow from parameters to the
558645
* return value or parameter or if `api` is a fluent API.
646+
*
647+
* `preservesValue` is `true` if the summary is value-preserving, and `false` otherwise.
559648
*/
560-
string captureFlow(DataFlowSummaryTargetApi api) {
561-
result = captureQualifierFlow(api) or
562-
result = captureThroughFlow(api)
649+
string captureHeuristicFlow(DataFlowSummaryTargetApi api, boolean preservesValue) {
650+
result = captureQualifierFlow(api) and preservesValue = true
651+
or
652+
result = captureThroughFlow(api, preservesValue)
563653
}
564654

565655
/**
@@ -569,7 +659,7 @@ module MakeModelGenerator<
569659
*/
570660
string captureNoFlow(DataFlowSummaryTargetApi api) {
571661
not exists(DataFlowSummaryTargetApi api0 |
572-
exists(captureFlow(api0)) and api0.lift() = api.lift()
662+
exists(captureFlow(api0, _)) and api0.lift() = api.lift()
573663
) and
574664
api.isRelevant() and
575665
result = ModelPrinting::asNeutralSummaryModel(api)
@@ -1024,12 +1114,13 @@ module MakeModelGenerator<
10241114
/**
10251115
* Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
10261116
* the return value or a parameter). `lift` is true, if the model should be lifted, otherwise false.
1117+
* `preservesValue` is `true` if the summary is value-preserving, and `false` otherwise.
10271118
*
10281119
* Models are lifted to the best type in case the read and store access paths do not
10291120
* contain a field or synthetic field access.
10301121
*/
1031-
string captureFlow(ContentDataFlowSummaryTargetApi api, boolean lift) {
1032-
exists(string input, string output, boolean preservesValue |
1122+
string captureFlow(ContentDataFlowSummaryTargetApi api, boolean lift, boolean preservesValue) {
1123+
exists(string input, string output |
10331124
captureFlow0(api, input, output, _, lift) and
10341125
preservesValue = max(boolean p | captureFlow0(api, input, output, p, lift)) and
10351126
result = ContentModelPrinting::asModel(api, input, output, preservesValue, lift)
@@ -1046,17 +1137,25 @@ module MakeModelGenerator<
10461137
* generate flow summaries using the heuristic based summary generator.
10471138
*/
10481139
string captureFlow(DataFlowSummaryTargetApi api, boolean lift) {
1049-
result = ContentSensitive::captureFlow(api, lift)
1050-
or
1051-
not exists(DataFlowSummaryTargetApi api0 |
1052-
(api0 = api or api.lift() = api0) and
1053-
exists(ContentSensitive::captureFlow(api0, false))
1140+
exists(boolean preservesValue |
1141+
result = ContentSensitive::captureFlow(api, lift, preservesValue)
10541142
or
1055-
api0.lift() = api.lift() and
1056-
exists(ContentSensitive::captureFlow(api0, true))
1057-
) and
1058-
result = Heuristic::captureFlow(api) and
1059-
lift = true
1143+
not exists(DataFlowSummaryTargetApi api0 |
1144+
// If the heuristic summary is value-preserving then we keep both
1145+
// summaries. However, if we can generate any content-sensitive
1146+
// summary (value-preserving or not) then we don't include any taint-
1147+
// based heuristic summary.
1148+
preservesValue = false
1149+
|
1150+
(api0 = api or api.lift() = api0) and
1151+
exists(ContentSensitive::captureFlow(api0, false, _))
1152+
or
1153+
api0.lift() = api.lift() and
1154+
exists(ContentSensitive::captureFlow(api0, true, _))
1155+
) and
1156+
result = Heuristic::captureHeuristicFlow(api, preservesValue) and
1157+
lift = true
1158+
)
10601159
}
10611160

10621161
/**

shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,11 @@ module ModelPrintingImpl<ModelPrintingLangSig Lang> {
8686
/**
8787
* Gets the lifted taint summary model for `api` with `input` and `output`.
8888
*/
89-
bindingset[input, output]
90-
string asLiftedTaintModel(Printing::SummaryApi api, string input, string output) {
91-
result = asModel(api, input, output, false, true)
89+
bindingset[input, output, preservesValue]
90+
string asLiftedTaintModel(
91+
Printing::SummaryApi api, string input, string output, boolean preservesValue
92+
) {
93+
result = asModel(api, input, output, preservesValue, true)
9294
}
9395

9496
/**

0 commit comments

Comments
 (0)