Skip to content

Commit bd9e3d0

Browse files
authored
Merge pull request github#5751 from aschackmull/java/collection-flow
Java: Convert all collection and array steps from taint flow to value flow.
2 parents ffad65b + e86c534 commit bd9e3d0

33 files changed

+2547
-220
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
lgtm,codescanning
2+
* Data flow now tracks steps through collections and arrays more precisely.
3+
That means that collection and array read steps are now matched up with
4+
preceding store steps. This results in increased precision for all flow-based
5+
queries, in particular most of the security queries.

java/ql/src/semmle/code/java/dataflow/ExternalFlow.qll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ private import FlowSummary
7575
* ensuring that they are visible to the taint tracking / data flow library.
7676
*/
7777
private module Frameworks {
78+
private import internal.ContainerFlow
7879
private import semmle.code.java.frameworks.ApacheHttp
7980
private import semmle.code.java.frameworks.apache.Lang
8081
private import semmle.code.java.frameworks.guava.Guava
@@ -482,7 +483,7 @@ module CsvValidation {
482483
not namespace.regexpMatch("[a-zA-Z0-9_\\.]+") and
483484
msg = "Dubious namespace \"" + namespace + "\" in " + pred + " model."
484485
or
485-
not type.regexpMatch("[a-zA-Z0-9_\\$]+") and
486+
not type.regexpMatch("[a-zA-Z0-9_\\$<>]+") and
486487
msg = "Dubious type \"" + type + "\" in " + pred + " model."
487488
or
488489
not name.regexpMatch("[a-zA-Z0-9_]*") and
@@ -566,7 +567,7 @@ private RefType interpretType(string namespace, string type, boolean subtypes) {
566567
private string paramsStringPart(Callable c, int i) {
567568
i = -1 and result = "("
568569
or
569-
exists(int n, string p | c.getParameterType(n).toString() = p |
570+
exists(int n, string p | c.getParameterType(n).getErasure().toString() = p |
570571
i = 2 * n and result = p
571572
or
572573
i = 2 * n - 1 and result = "," and n != 0

java/ql/src/semmle/code/java/dataflow/internal/ContainerFlow.qll

Lines changed: 338 additions & 0 deletions
Large diffs are not rendered by default.

java/ql/src/semmle/code/java/dataflow/internal/DataFlowPrivate.qll

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ private import DataFlowImplCommon
44
private import DataFlowDispatch
55
private import semmle.code.java.controlflow.Guards
66
private import semmle.code.java.dataflow.SSA
7+
private import ContainerFlow
78
private import FlowSummaryImpl as FlowSummaryImpl
89
import DataFlowNodes::Private
910

@@ -137,13 +138,15 @@ class MapValueContent extends Content, TMapValueContent {
137138
* Thus, `node2` references an object with a field `f` that contains the
138139
* value of `node1`.
139140
*/
140-
predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
141+
predicate storeStep(Node node1, Content f, Node node2) {
141142
exists(FieldAccess fa |
142143
instanceFieldAssign(node1.asExpr(), fa) and
143-
node2.getPreUpdateNode() = getFieldQualifier(fa) and
144+
node2.(PostUpdateNode).getPreUpdateNode() = getFieldQualifier(fa) and
144145
f.(FieldContent).getField() = fa.getField()
145146
)
146147
or
148+
f instanceof ArrayContent and arrayStoreStep(node1, node2)
149+
or
147150
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1, f, node2)
148151
}
149152

@@ -171,6 +174,10 @@ predicate readStep(Node node1, Content f, Node node2) {
171174
node2.asExpr() = get
172175
)
173176
or
177+
f instanceof ArrayContent and arrayReadStep(node1, node2, _)
178+
or
179+
f instanceof CollectionContent and collectionReadStep(node1, node2)
180+
or
174181
FlowSummaryImpl::Private::Steps::summaryReadStep(node1, f, node2)
175182
}
176183

java/ql/src/semmle/code/java/dataflow/internal/DataFlowUtil.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ predicate simpleLocalFlowStep(Node node1, Node node2) {
144144
or
145145
node2.asExpr().(AssignExpr).getSource() = node1.asExpr()
146146
or
147+
node2.asExpr().(ArrayCreationExpr).getInit() = node1.asExpr()
148+
or
147149
exists(MethodAccess ma, ValuePreservingMethod m, int argNo |
148150
ma.getCallee().getSourceDeclaration() = m and m.returnsValue(argNo)
149151
|

java/ql/src/semmle/code/java/dataflow/internal/FlowSummaryImplSpecific.qll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ DataFlowType getContentType(Content c) {
3131
or
3232
c instanceof ArrayContent and
3333
result instanceof TypeObject
34+
or
35+
c instanceof MapKeyContent and
36+
result instanceof TypeObject
37+
or
38+
c instanceof MapValueContent and
39+
result instanceof TypeObject
3440
}
3541

3642
/** Gets the return type of kind `rk` for callable `c`. */

java/ql/src/semmle/code/java/dataflow/internal/TaintTrackingUtil.qll

Lines changed: 95 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,15 @@ private module Cached {
6060
localAdditionalTaintUpdateStep(src.asExpr(),
6161
sink.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr())
6262
or
63-
exists(Argument arg |
64-
src.asExpr() = arg and
65-
arg.isVararg() and
66-
sink.(DataFlow::ImplicitVarargsArray).getCall() = arg.getCall()
63+
exists(Content f |
64+
readStep(src, f, sink) and
65+
not sink.getTypeBound() instanceof PrimitiveType and
66+
not sink.getTypeBound() instanceof BoxedType and
67+
not sink.getTypeBound() instanceof NumberType
68+
|
69+
f instanceof ArrayContent or
70+
f instanceof CollectionContent or
71+
f instanceof MapValueContent
6772
)
6873
or
6974
FlowSummaryImpl::Private::Steps::summaryLocalStep(src, sink, false)
@@ -93,6 +98,92 @@ private module Cached {
9398

9499
import Cached
95100

101+
/**
102+
* These configurations add a number of configuration-dependent additional taint
103+
* steps to all taint configurations. For each sink or additional step provided
104+
* by a given configuration the types are inspected to find those implicit
105+
* collection or array read steps that might be required at the sink or step
106+
* input. The corresponding store steps are then added as additional taint steps
107+
* to provide backwards-compatible taint flow to such sinks and steps.
108+
*
109+
* This is a temporary measure until support is added for such sinks that
110+
* require implicit read steps.
111+
*/
112+
private module StoreTaintSteps {
113+
private import semmle.code.java.dataflow.TaintTracking
114+
private import semmle.code.java.dataflow.TaintTracking2
115+
116+
private class StoreTaintConfig extends TaintTracking::Configuration {
117+
StoreTaintConfig() { this instanceof TaintTracking::Configuration or none() }
118+
119+
override predicate isSource(DataFlow::Node n) { none() }
120+
121+
override predicate isSink(DataFlow::Node n) { none() }
122+
123+
private predicate needsTaintStore(RefType container, Type elem, Content f) {
124+
exists(DataFlow::Node arg |
125+
(isSink(arg) or isAdditionalTaintStep(arg, _)) and
126+
(arg.asExpr() instanceof Argument or arg instanceof ArgumentNode) and
127+
arg.getType() = container
128+
or
129+
needsTaintStore(_, container, _)
130+
|
131+
container.(Array).getComponentType() = elem and
132+
f instanceof ArrayContent
133+
or
134+
container.(CollectionType).getElementType() = elem and
135+
f instanceof CollectionContent
136+
or
137+
container.(MapType).getValueType() = elem and
138+
f instanceof MapValueContent
139+
)
140+
}
141+
142+
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
143+
exists(Content f, Type elem |
144+
storeStep(node1, f, node2) and
145+
needsTaintStore(_, elem, f) and
146+
not exists(Type srctyp | srctyp = node1.getTypeBound() | not compatibleTypes(srctyp, elem))
147+
)
148+
}
149+
}
150+
151+
private class StoreTaintConfig2 extends TaintTracking2::Configuration {
152+
StoreTaintConfig2() { this instanceof TaintTracking2::Configuration or none() }
153+
154+
override predicate isSource(DataFlow::Node n) { none() }
155+
156+
override predicate isSink(DataFlow::Node n) { none() }
157+
158+
private predicate needsTaintStore(RefType container, Type elem, Content f) {
159+
exists(DataFlow::Node arg |
160+
(isSink(arg) or isAdditionalTaintStep(arg, _)) and
161+
(arg.asExpr() instanceof Argument or arg instanceof ArgumentNode) and
162+
arg.getType() = container
163+
or
164+
needsTaintStore(_, container, _)
165+
|
166+
container.(Array).getComponentType() = elem and
167+
f instanceof ArrayContent
168+
or
169+
container.(CollectionType).getElementType() = elem and
170+
f instanceof CollectionContent
171+
or
172+
container.(MapType).getValueType() = elem and
173+
f instanceof MapValueContent
174+
)
175+
}
176+
177+
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
178+
exists(Content f, Type elem |
179+
storeStep(node1, f, node2) and
180+
needsTaintStore(_, elem, f) and
181+
not exists(Type srctyp | srctyp = node1.getTypeBound() | not compatibleTypes(srctyp, elem))
182+
)
183+
}
184+
}
185+
}
186+
96187
/**
97188
* Holds if taint can flow in one local step from `src` to `sink` excluding
98189
* local data flow steps. That is, `src` and `sink` are likely to represent
@@ -103,22 +194,8 @@ private predicate localAdditionalTaintExprStep(Expr src, Expr sink) {
103194
or
104195
sink.(AssignAddExpr).getSource() = src and sink.getType() instanceof TypeString
105196
or
106-
sink.(ArrayCreationExpr).getInit() = src
107-
or
108-
sink.(ArrayInit).getAnInit() = src
109-
or
110-
sink.(ArrayAccess).getArray() = src
111-
or
112197
sink.(LogicExpr).getAnOperand() = src
113198
or
114-
exists(EnhancedForStmt for, SsaExplicitUpdate v |
115-
for.getExpr() = src and
116-
v.getDefiningExpr() = for.getVariable() and
117-
v.getAFirstUse() = sink
118-
)
119-
or
120-
containerReturnValueStep(src, sink)
121-
or
122199
constructorStep(src, sink)
123200
or
124201
qualifierToMethodStep(src, sink)
@@ -141,12 +218,6 @@ private predicate localAdditionalTaintExprStep(Expr src, Expr sink) {
141218
* This is restricted to cases where the step updates the value of `sink`.
142219
*/
143220
private predicate localAdditionalTaintUpdateStep(Expr src, Expr sink) {
144-
exists(Assignment assign | assign.getSource() = src |
145-
sink = assign.getDest().(ArrayAccess).getArray()
146-
)
147-
or
148-
containerUpdateStep(src, sink)
149-
or
150221
qualifierToArgumentStep(src, sink)
151222
or
152223
argToArgStep(src, sink)

java/ql/src/semmle/code/java/frameworks/ApacheHttp.qll

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,14 +165,17 @@ private class ApacheHttpFlowStep extends SummaryModelCsv {
165165
"org.apache.http.util;EncodingUtils;true;getAsciiString;;;Argument[0];ReturnValue;taint",
166166
"org.apache.http.util;EncodingUtils;true;getBytes;(String,String);;Argument[0];ReturnValue;taint",
167167
"org.apache.http.util;EncodingUtils;true;getString;;;Argument[0];ReturnValue;taint",
168-
"org.apache.http.util;Args;true;containsNoBlanks;(T,String);;Argument[0];ReturnValue;value",
169-
"org.apache.http.util;Args;true;notNull;(T,String);;Argument[0];ReturnValue;value",
170-
"org.apache.http.util;Args;true;notEmpty;(T,String);;Argument[0];ReturnValue;value",
171-
"org.apache.http.util;Args;true;notBlank;(T,String);;Argument[0];ReturnValue;value",
172-
"org.apache.hc.core5.util;Args;true;containsNoBlanks;(T,String);;Argument[0];ReturnValue;value",
173-
"org.apache.hc.core5.util;Args;true;notNull;(T,String);;Argument[0];ReturnValue;value",
174-
"org.apache.hc.core5.util;Args;true;notEmpty;(T,String);;Argument[0];ReturnValue;value",
175-
"org.apache.hc.core5.util;Args;true;notBlank;(T,String);;Argument[0];ReturnValue;value",
168+
"org.apache.http.util;Args;true;containsNoBlanks;(CharSequence,String);;Argument[0];ReturnValue;value",
169+
"org.apache.http.util;Args;true;notNull;(Object,String);;Argument[0];ReturnValue;value",
170+
"org.apache.http.util;Args;true;notEmpty;(CharSequence,String);;Argument[0];ReturnValue;value",
171+
"org.apache.http.util;Args;true;notEmpty;(Collection,String);;Argument[0];ReturnValue;value",
172+
"org.apache.http.util;Args;true;notBlank;(CharSequence,String);;Argument[0];ReturnValue;value",
173+
"org.apache.hc.core5.util;Args;true;containsNoBlanks;(CharSequence,String);;Argument[0];ReturnValue;value",
174+
"org.apache.hc.core5.util;Args;true;notNull;(Object,String);;Argument[0];ReturnValue;value",
175+
"org.apache.hc.core5.util;Args;true;notEmpty;(Collection,String);;Argument[0];ReturnValue;value",
176+
"org.apache.hc.core5.util;Args;true;notEmpty;(CharSequence,String);;Argument[0];ReturnValue;value",
177+
"org.apache.hc.core5.util;Args;true;notEmpty;(Object,String);;Argument[0];ReturnValue;value",
178+
"org.apache.hc.core5.util;Args;true;notBlank;(CharSequence,String);;Argument[0];ReturnValue;value",
176179
"org.apache.hc.core5.http.io.entity;HttpEntities;true;create;;;Argument[0];ReturnValue;taint",
177180
"org.apache.hc.core5.http.io.entity;HttpEntities;true;createGzipped;;;Argument[0];ReturnValue;taint",
178181
"org.apache.hc.core5.http.io.entity;HttpEntities;true;createUrlEncoded;;;Argument[0];ReturnValue;taint",

0 commit comments

Comments
 (0)