Skip to content

Commit 5458c02

Browse files
authored
Merge pull request github#5456 from aschackmull/java/adopt-flow-summary
Java: Use shared flow summary library for CSV models.
2 parents 33db0c1 + 80eb0a2 commit 5458c02

File tree

30 files changed

+480
-178
lines changed

30 files changed

+480
-178
lines changed

java/ql/src/semmle/code/java/dataflow/ExternalFlow.qll

Lines changed: 74 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import java
6868
private import semmle.code.java.dataflow.DataFlow::DataFlow
6969
private import internal.DataFlowPrivate
70+
private import FlowSummary
7071

7172
/**
7273
* A module importing the frameworks that provide external flow data,
@@ -459,7 +460,8 @@ module CsvValidation {
459460
summaryModel(_, _, _, _, _, _, input, _, _) and pred = "summary"
460461
|
461462
specSplit(input, part, _) and
462-
not part.regexpMatch("|Argument|ReturnValue") and
463+
not part.regexpMatch("|ReturnValue|ArrayElement|Element|MapKey|MapValue") and
464+
not (part = "Argument" and pred = "sink") and
463465
not parseArg(part, _) and
464466
msg = "Unrecognized input specification \"" + part + "\" in " + pred + " model."
465467
)
@@ -470,7 +472,8 @@ module CsvValidation {
470472
summaryModel(_, _, _, _, _, _, _, output, _) and pred = "summary"
471473
|
472474
specSplit(output, part, _) and
473-
not part.regexpMatch("|Argument|Parameter|ReturnValue") and
475+
not part.regexpMatch("|ReturnValue|ArrayElement|Element|MapKey|MapValue") and
476+
not (part = ["Argument", "Parameter"] and pred = "source") and
474477
not parseArg(part, _) and
475478
not parseParam(part, _) and
476479
msg = "Unrecognized output specification \"" + part + "\" in " + pred + " model."
@@ -675,6 +678,75 @@ private predicate summaryElementRef(Top ref, string input, string output, string
675678
)
676679
}
677680

681+
private SummaryComponent interpretComponent(string c) {
682+
specSplit(_, c, _) and
683+
(
684+
exists(int pos | parseArg(c, pos) and result = SummaryComponent::argument(pos))
685+
or
686+
exists(int pos | parseParam(c, pos) and result = SummaryComponent::parameter(pos))
687+
or
688+
c = "ReturnValue" and result = SummaryComponent::return()
689+
or
690+
c = "ArrayElement" and result = SummaryComponent::content(any(ArrayContent c0))
691+
or
692+
c = "Element" and result = SummaryComponent::content(any(CollectionContent c0))
693+
or
694+
c = "MapKey" and result = SummaryComponent::content(any(MapKeyContent c0))
695+
or
696+
c = "MapValue" and result = SummaryComponent::content(any(MapValueContent c0))
697+
)
698+
}
699+
700+
private predicate interpretSpec(string spec, int idx, SummaryComponentStack stack) {
701+
exists(string c |
702+
summaryElement(_, spec, _, _) or
703+
summaryElement(_, _, spec, _)
704+
|
705+
len(spec, idx + 1) and
706+
specSplit(spec, c, idx) and
707+
stack = SummaryComponentStack::singleton(interpretComponent(c))
708+
)
709+
or
710+
exists(SummaryComponent head, SummaryComponentStack tail |
711+
interpretSpec(spec, idx, head, tail) and
712+
stack = SummaryComponentStack::push(head, tail)
713+
)
714+
}
715+
716+
private predicate interpretSpec(
717+
string output, int idx, SummaryComponent head, SummaryComponentStack tail
718+
) {
719+
exists(string c |
720+
interpretSpec(output, idx + 1, tail) and
721+
specSplit(output, c, idx) and
722+
head = interpretComponent(c)
723+
)
724+
}
725+
726+
private class MkStack extends RequiredSummaryComponentStack {
727+
MkStack() { interpretSpec(_, _, _, this) }
728+
729+
override predicate required(SummaryComponent c) { interpretSpec(_, _, c, this) }
730+
}
731+
732+
private class SummarizedCallableExternal extends SummarizedCallable {
733+
SummarizedCallableExternal() { summaryElement(this, _, _, _) }
734+
735+
override predicate propagatesFlow(
736+
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
737+
) {
738+
exists(string inSpec, string outSpec, string kind |
739+
summaryElement(this, inSpec, outSpec, kind) and
740+
interpretSpec(inSpec, 0, input) and
741+
interpretSpec(outSpec, 0, output)
742+
|
743+
kind = "value" and preservesValue = true
744+
or
745+
kind = "taint" and preservesValue = false
746+
)
747+
}
748+
}
749+
678750
private newtype TAstOrNode =
679751
TAst(Top t) or
680752
TNode(Node n)
@@ -761,15 +833,3 @@ predicate sinkNode(Node node, string kind) {
761833
interpretInput(input, 0, ref, TNode(node))
762834
)
763835
}
764-
765-
/**
766-
* Holds if `node1` to `node2` is specified as a flow step with the given kind
767-
* in a CSV flow model.
768-
*/
769-
predicate summaryStep(Node node1, Node node2, string kind) {
770-
exists(Top ref, string input, string output |
771-
summaryElementRef(ref, input, output, kind) and
772-
interpretInput(input, 0, ref, TNode(node1)) and
773-
interpretOutput(output, 0, ref, TNode(node2))
774-
)
775-
}

java/ql/src/semmle/code/java/dataflow/FlowSummary.qll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ private import internal.FlowSummaryImpl as Impl
77
private import internal.DataFlowDispatch
88
private import internal.DataFlowPrivate
99

10-
// import all instances below
11-
private module Summaries { }
10+
// import all instances of SummarizedCallable below
11+
private module Summaries {
12+
private import semmle.code.java.dataflow.ExternalFlow
13+
}
1214

1315
class SummaryComponent = Impl::Public::SummaryComponent;
1416

java/ql/src/semmle/code/java/dataflow/internal/DataFlowPrivate.qll

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,10 @@ private predicate instanceFieldAssign(Expr src, FieldAccess fa) {
8484

8585
private newtype TContent =
8686
TFieldContent(InstanceField f) or
87+
TArrayContent() or
8788
TCollectionContent() or
88-
TArrayContent()
89+
TMapKeyContent() or
90+
TMapValueContent()
8991

9092
/**
9193
* A reference contained in an object. Examples include instance fields, the
@@ -114,12 +116,20 @@ class FieldContent extends Content, TFieldContent {
114116
}
115117
}
116118

119+
class ArrayContent extends Content, TArrayContent {
120+
override string toString() { result = "[]" }
121+
}
122+
117123
class CollectionContent extends Content, TCollectionContent {
118-
override string toString() { result = "collection" }
124+
override string toString() { result = "<element>" }
119125
}
120126

121-
class ArrayContent extends Content, TArrayContent {
122-
override string toString() { result = "array" }
127+
class MapKeyContent extends Content, TMapKeyContent {
128+
override string toString() { result = "<map.key>" }
129+
}
130+
131+
class MapValueContent extends Content, TMapValueContent {
132+
override string toString() { result = "<map.value>" }
123133
}
124134

125135
/**

java/ql/src/semmle/code/java/dataflow/internal/DataFlowUtil.qll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,6 @@ predicate simpleLocalFlowStep(Node node1, Node node2) {
142142
or
143143
node2.asExpr().(AssignExpr).getSource() = node1.asExpr()
144144
or
145-
summaryStep(node1, node2, "value")
146-
or
147145
exists(MethodAccess ma, ValuePreservingMethod m, int argNo |
148146
ma.getCallee().getSourceDeclaration() = m and m.returnsValue(argNo)
149147
|
@@ -152,6 +150,14 @@ predicate simpleLocalFlowStep(Node node1, Node node2) {
152150
)
153151
or
154152
FlowSummaryImpl::Private::Steps::summaryLocalStep(node1, node2, true)
153+
or
154+
// If flow through a method updates a parameter from some input A, and that
155+
// parameter also is returned through B, then we'd like a combined flow from A
156+
// to B as well. As an example, this simplifies modeling of fluent methods:
157+
// for `StringBuilder.append(x)` with a specified value flow from qualifier to
158+
// return value and taint flow from argument 0 to the qualifier, then this
159+
// allows us to infer taint flow from argument 0 to the return value.
160+
node1.(SummaryNode).(PostUpdateNode).getPreUpdateNode().(ParameterNode) = node2
155161
}
156162

157163
/**

java/ql/src/semmle/code/java/dataflow/internal/TaintTrackingUtil.qll

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -46,47 +46,19 @@ predicate localTaintStep(DataFlow::Node src, DataFlow::Node sink) {
4646
* different objects.
4747
*/
4848
predicate localAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) {
49-
localAdditionalBasicTaintStep(src, sink)
50-
or
51-
composedValueAndTaintModelStep(src, sink)
52-
}
53-
54-
private predicate localAdditionalBasicTaintStep(DataFlow::Node src, DataFlow::Node sink) {
5549
localAdditionalTaintExprStep(src.asExpr(), sink.asExpr())
5650
or
5751
localAdditionalTaintUpdateStep(src.asExpr(),
5852
sink.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr())
5953
or
60-
summaryStep(src, sink, "taint") and
61-
not summaryStep(src, sink, "value")
62-
or
6354
exists(Argument arg |
6455
src.asExpr() = arg and
6556
arg.isVararg() and
6657
sink.(DataFlow::ImplicitVarargsArray).getCall() = arg.getCall()
6758
)
6859
or
69-
FlowSummaryImpl::Private::Steps::summaryLocalStep(src, sink, false)
70-
}
71-
72-
/**
73-
* Holds if an additional step from `src` to `sink` through a call can be inferred from the
74-
* combination of a value-preserving step providing an alias between an input and the output
75-
* and a taint step from `src` to one the aliased nodes. For example, if we know that `f(a, b)` returns
76-
* the exact value of `a` and also propagates taint from `b` to `a`, then we also know that
77-
* the return value is tainted after `f` completes.
78-
*/
79-
private predicate composedValueAndTaintModelStep(ArgumentNode src, DataFlow::Node sink) {
80-
exists(Call call, ArgumentNode valueSource, DataFlow::PostUpdateNode valueSourcePost |
81-
src.argumentOf(call, _) and
82-
valueSource.argumentOf(call, _) and
83-
src != valueSource and
84-
valueSourcePost.getPreUpdateNode() = valueSource and
85-
// in-x -value-> out-y and in-z -taint-> in-x ==> in-z -taint-> out-y
86-
localAdditionalBasicTaintStep(src, valueSourcePost) and
87-
DataFlow::localFlowStep(valueSource, DataFlow::exprNode(call)) and
88-
sink = DataFlow::exprNode(call)
89-
)
60+
FlowSummaryImpl::Private::Steps::summaryLocalStep(src, sink, false) and
61+
not FlowSummaryImpl::Private::Steps::summaryLocalStep(src, sink, true)
9062
}
9163

9264
/**

java/ql/src/semmle/code/java/frameworks/ApacheHttp.qll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,8 @@ private class ApacheHttpFlowStep extends SummaryModelCsv {
149149
"org.apache.hc.core5.http.message;RequestLine;true;getMethod;();;Argument[-1];ReturnValue;taint",
150150
"org.apache.hc.core5.http.message;RequestLine;true;getUri;();;Argument[-1];ReturnValue;taint",
151151
"org.apache.hc.core5.http.message;RequestLine;true;toString;();;Argument[-1];ReturnValue;taint",
152-
"org.apache.hc.core5.http.message;RequestLine;true;RequestLine;(HttpRequest);;Argument[0];ReturnValue;taint",
153-
"org.apache.hc.core5.http.message;RequestLine;true;RequestLine;(String,String,ProtocolVersion);;Argument[1];ReturnValue;taint",
154-
"org.apache.hc.core5.http.message;RequestLine;true;RequestLine;(String,String,ProtocolVersion);;Argument[1];ReturnValue;taint",
152+
"org.apache.hc.core5.http.message;RequestLine;true;RequestLine;(HttpRequest);;Argument[0];Argument[-1];taint",
153+
"org.apache.hc.core5.http.message;RequestLine;true;RequestLine;(String,String,ProtocolVersion);;Argument[1];Argument[-1];taint",
155154
"org.apache.hc.core5.function;Supplier;true;get;();;Argument[-1];ReturnValue;taint",
156155
"org.apache.hc.core5.net;URIAuthority;true;getHostName;();;Argument[-1];ReturnValue;taint",
157156
"org.apache.hc.core5.net;URIAuthority;true;toString;();;Argument[-1];ReturnValue;taint",
@@ -181,16 +180,16 @@ private class ApacheHttpFlowStep extends SummaryModelCsv {
181180
"org.apache.hc.core5.http.io.entity;HttpEntities;true;withTrailers;;;Argument[0];ReturnValue;taint",
182181
"org.apache.http.entity;BasicHttpEntity;true;setContent;(InputStream);;Argument[0];Argument[-1];taint",
183182
"org.apache.http.entity;BufferedHttpEntity;true;BufferedHttpEntity;(HttpEntity);;Argument[0];ReturnValue;taint",
184-
"org.apache.http.entity;ByteArrayEntity;true;ByteArrayEntity;;;Argument[0];ReturnValue;taint",
183+
"org.apache.http.entity;ByteArrayEntity;true;ByteArrayEntity;;;Argument[0];Argument[-1];taint",
185184
"org.apache.http.entity;HttpEntityWrapper;true;HttpEntityWrapper;(HttpEntity);;Argument[0];ReturnValue;taint",
186185
"org.apache.http.entity;InputStreamEntity;true;InputStreamEntity;;;Argument[0];ReturnValue;taint",
187-
"org.apache.http.entity;StringEntity;true;StringEntity;;;Argument[0];ReturnValue;taint",
186+
"org.apache.http.entity;StringEntity;true;StringEntity;;;Argument[0];Argument[-1];taint",
188187
"org.apache.hc.core5.http.io.entity;BasicHttpEntity;true;BasicHttpEntity;;;Argument[0];ReturnValue;taint",
189188
"org.apache.hc.core5.http.io.entity;BufferedHttpEntity;true;BufferedHttpEntity;(HttpEntity);;Argument[0];ReturnValue;taint",
190-
"org.apache.hc.core5.http.io.entity;ByteArrayEntity;true;ByteArrayEntity;;;Argument[0];ReturnValue;taint",
189+
"org.apache.hc.core5.http.io.entity;ByteArrayEntity;true;ByteArrayEntity;;;Argument[0];Argument[-1];taint",
191190
"org.apache.hc.core5.http.io.entity;HttpEntityWrapper;true;HttpEntityWrapper;(HttpEntity);;Argument[0];ReturnValue;taint",
192191
"org.apache.hc.core5.http.io.entity;InputStreamEntity;true;InputStreamEntity;;;Argument[0];ReturnValue;taint",
193-
"org.apache.hc.core5.http.io.entity;StringEntity;true;StringEntity;;;Argument[0];ReturnValue;taint",
192+
"org.apache.hc.core5.http.io.entity;StringEntity;true;StringEntity;;;Argument[0];Argument[-1];taint",
194193
"org.apache.http.util;ByteArrayBuffer;true;append;(byte[],int,int);;Argument[0];Argument[-1];taint",
195194
"org.apache.http.util;ByteArrayBuffer;true;append;(char[],int,int);;Argument[0];Argument[-1];taint",
196195
"org.apache.http.util;ByteArrayBuffer;true;append;(CharArrayBuffer,int,int);;Argument[0];Argument[-1];taint",

java/ql/src/semmle/code/java/frameworks/apache/Lang.qll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -626,12 +626,12 @@ private class ApacheObjectUtilsModel extends SummaryModelCsv {
626626
"org.apache.commons.lang3;ObjectUtils;false;CONST_BYTE;;;Argument[0];ReturnValue;value",
627627
"org.apache.commons.lang3;ObjectUtils;false;CONST_SHORT;;;Argument[0];ReturnValue;value",
628628
"org.apache.commons.lang3;ObjectUtils;false;defaultIfNull;;;Argument[0..1];ReturnValue;value",
629-
"org.apache.commons.lang3;ObjectUtils;false;firstNonNull;;;Argument[0];ReturnValue;taint",
629+
"org.apache.commons.lang3;ObjectUtils;false;firstNonNull;;;ArrayElement of Argument[0];ReturnValue;value",
630630
"org.apache.commons.lang3;ObjectUtils;false;getIfNull;;;Argument[0];ReturnValue;value",
631-
"org.apache.commons.lang3;ObjectUtils;false;max;;;Argument[0];ReturnValue;taint",
632-
"org.apache.commons.lang3;ObjectUtils;false;median;;;Argument[0];ReturnValue;taint",
633-
"org.apache.commons.lang3;ObjectUtils;false;min;;;Argument[0];ReturnValue;taint",
634-
"org.apache.commons.lang3;ObjectUtils;false;mode;;;Argument[0];ReturnValue;taint",
631+
"org.apache.commons.lang3;ObjectUtils;false;max;;;ArrayElement of Argument[0];ReturnValue;value",
632+
"org.apache.commons.lang3;ObjectUtils;false;median;;;ArrayElement of Argument[0];ReturnValue;value",
633+
"org.apache.commons.lang3;ObjectUtils;false;min;;;ArrayElement of Argument[0];ReturnValue;value",
634+
"org.apache.commons.lang3;ObjectUtils;false;mode;;;ArrayElement of Argument[0];ReturnValue;value",
635635
"org.apache.commons.lang3;ObjectUtils;false;requireNonEmpty;;;Argument[0];ReturnValue;value",
636636
"org.apache.commons.lang3;ObjectUtils;false;toString;(Object,String);;Argument[1];ReturnValue;value"
637637
]

java/ql/src/semmle/code/java/frameworks/guava/Base.qll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ private class GuavaBaseCsv extends SummaryModelCsv {
1313
"com.google.common.base;Strings;false;padStart;(String,int,char);;Argument[0];ReturnValue;taint",
1414
"com.google.common.base;Strings;false;padEnd;(String,int,char);;Argument[0];ReturnValue;taint",
1515
"com.google.common.base;Strings;false;repeat;(String,int);;Argument[0];ReturnValue;taint",
16-
"com.google.common.base;Strings;false;lenientFormat;(String,Object[]);;Argument;ReturnValue;taint",
16+
"com.google.common.base;Strings;false;lenientFormat;(String,Object[]);;Argument[0];ReturnValue;taint",
17+
"com.google.common.base;Strings;false;lenientFormat;(String,Object[]);;ArrayElement of Argument[1];ReturnValue;taint",
1718
"com.google.common.base;Joiner;false;on;(String);;Argument[0];ReturnValue;taint",
1819
"com.google.common.base;Joiner;false;skipNulls;();;Argument[-1];ReturnValue;taint",
1920
"com.google.common.base;Joiner;false;useForNull;(String);;Argument[-1];ReturnValue;taint",
@@ -22,14 +23,14 @@ private class GuavaBaseCsv extends SummaryModelCsv {
2223
"com.google.common.base;Joiner;false;withKeyValueSeparator;(String);;Argument[-1];ReturnValue;taint",
2324
"com.google.common.base;Joiner;false;withKeyValueSeparator;(char);;Argument[-1];ReturnValue;taint",
2425
// Note: The signatures of some of the appendTo methods involve collection flow
25-
"com.google.common.base;Joiner;false;appendTo;;;Argument;Argument[0];taint",
26+
"com.google.common.base;Joiner;false;appendTo;;;Argument[-1..3];Argument[0];taint",
2627
"com.google.common.base;Joiner;false;appendTo;;;Argument[0];ReturnValue;value",
27-
"com.google.common.base;Joiner;false;join;;;Argument;ReturnValue;taint",
28+
"com.google.common.base;Joiner;false;join;;;Argument[-1..2];ReturnValue;taint",
2829
"com.google.common.base;Joiner$MapJoiner;false;useForNull;(String);;Argument[0];ReturnValue;taint",
2930
"com.google.common.base;Joiner$MapJoiner;false;useForNull;(String);;Argument[-1];ReturnValue;taint",
30-
"com.google.common.base;Joiner$MapJoiner;false;appendTo;;;Argument;Argument[0];taint",
31+
"com.google.common.base;Joiner$MapJoiner;false;appendTo;;;Argument[1];Argument[0];taint",
3132
"com.google.common.base;Joiner$MapJoiner;false;appendTo;;;Argument[0];ReturnValue;value",
32-
"com.google.common.base;Joiner$MapJoiner;false;join;;;Argument;ReturnValue;taint",
33+
"com.google.common.base;Joiner$MapJoiner;false;join;;;Argument[-1..0];ReturnValue;taint",
3334
"com.google.common.base;Splitter;false;split;(CharSequence);;Argument[0];ReturnValue;taint",
3435
"com.google.common.base;Splitter;false;splitToList;(CharSequence);;Argument[0];ReturnValue;taint",
3536
"com.google.common.base;Splitter;false;splitToStream;(CharSequence);;Argument[0];ReturnValue;taint",

java/ql/src/semmle/code/java/frameworks/guava/IO.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ private class GuavaIoCsv extends SummaryModelCsv {
6161
"com.google.common.io;Files;false;simplifyPath;(String);;Argument[0];ReturnValue;taint",
6262
"com.google.common.io;MoreFiles;false;getFileExtension;(Path);;Argument[0];ReturnValue;taint",
6363
"com.google.common.io;MoreFiles;false;getNameWithoutExtension;(Path);;Argument[0];ReturnValue;taint",
64-
"com.google.common.io;LineReader;false;LineReader;(Readable);;Argument[0];ReturnValue;taint",
64+
"com.google.common.io;LineReader;false;LineReader;(Readable);;Argument[0];Argument[-1];taint",
6565
"com.google.common.io;LineReader;true;readLine;();;Argument[-1];ReturnValue;taint",
6666
"com.google.common.io;ByteArrayDataOutput;true;toByteArray;();;Argument[-1];ReturnValue;taint",
6767
"com.google.common.io;ByteArrayDataOutput;true;write;(byte[]);;Argument[0];Argument[-1];taint",

0 commit comments

Comments
 (0)