Skip to content

Commit 5ce4483

Browse files
authored
Merge pull request github#10795 from aschackmull/java/synth-callable
Java: Add support for synthetic callables with flow summaries and model Stream.collect
2 parents fb49bab + ad8f0fc commit 5ce4483

File tree

13 files changed

+273
-32
lines changed

13 files changed

+273
-32
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Added support for common patterns involving `Stream.collect` and common collectors like `Collectors.toList()`.

java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ import java
7575
private import semmle.code.java.dataflow.DataFlow::DataFlow
7676
private import internal.DataFlowPrivate
7777
private import internal.FlowSummaryImpl::Private::External
78-
private import internal.FlowSummaryImplSpecific
78+
private import internal.FlowSummaryImplSpecific as FlowSummaryImplSpecific
7979
private import internal.AccessPathSyntax
8080
private import FlowSummary
8181

@@ -834,7 +834,7 @@ private module Cached {
834834
*/
835835
cached
836836
predicate sourceNode(Node node, string kind) {
837-
exists(InterpretNode n | isSourceNode(n, kind) and n.asNode() = node)
837+
exists(FlowSummaryImplSpecific::InterpretNode n | isSourceNode(n, kind) and n.asNode() = node)
838838
}
839839

840840
/**
@@ -843,7 +843,7 @@ private module Cached {
843843
*/
844844
cached
845845
predicate sinkNode(Node node, string kind) {
846-
exists(InterpretNode n | isSinkNode(n, kind) and n.asNode() = node)
846+
exists(FlowSummaryImplSpecific::InterpretNode n | isSinkNode(n, kind) and n.asNode() = node)
847847
}
848848
}
849849

java/ql/lib/semmle/code/java/dataflow/FlowSummary.qll

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import java
66
private import internal.FlowSummaryImpl as Impl
7-
private import internal.DataFlowDispatch
87
private import internal.DataFlowUtil
98

109
// import all instances of SummarizedCallable below
@@ -24,6 +23,12 @@ module SummaryComponent {
2423
/** Gets a summary component for field `f`. */
2524
SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f)) }
2625

26+
/** Gets a summary component for `Element`. */
27+
SummaryComponent element() { result = content(any(CollectionContent c)) }
28+
29+
/** Gets a summary component for `MapValue`. */
30+
SummaryComponent mapValue() { result = content(any(MapValueContent c)) }
31+
2732
/** Gets a summary component that represents the return value of a call. */
2833
SummaryComponent return() { result = return(_) }
2934
}
@@ -42,10 +47,129 @@ module SummaryComponentStack {
4247
result = push(SummaryComponent::field(f), object)
4348
}
4449

50+
/** Gets a stack representing `Element` of `object`. */
51+
SummaryComponentStack elementOf(SummaryComponentStack object) {
52+
result = push(SummaryComponent::element(), object)
53+
}
54+
55+
/** Gets a stack representing `MapValue` of `object`. */
56+
SummaryComponentStack mapValueOf(SummaryComponentStack object) {
57+
result = push(SummaryComponent::mapValue(), object)
58+
}
59+
4560
/** Gets a singleton stack representing a (normal) return. */
4661
SummaryComponentStack return() { result = singleton(SummaryComponent::return()) }
4762
}
4863

64+
/** A synthetic callable with a set of concrete call sites and a flow summary. */
65+
abstract class SyntheticCallable extends string {
66+
bindingset[this]
67+
SyntheticCallable() { any() }
68+
69+
/** Gets a call that targets this callable. */
70+
abstract Call getACall();
71+
72+
/**
73+
* Holds if data may flow from `input` to `output` through this callable.
74+
*
75+
* See `SummarizedCallable::propagatesFlow` for details.
76+
*/
77+
predicate propagatesFlow(
78+
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
79+
) {
80+
none()
81+
}
82+
83+
/**
84+
* Gets the type of the parameter at the specified position with -1 indicating
85+
* the instance parameter. If no types are provided then the types default to
86+
* `Object`.
87+
*/
88+
Type getParameterType(int pos) { none() }
89+
90+
/**
91+
* Gets the return type of this callable. If no type is provided then the type
92+
* defaults to `Object`.
93+
*/
94+
Type getReturnType() { none() }
95+
}
96+
97+
private newtype TSummarizedCallableBase =
98+
TSimpleCallable(Callable c) { c.isSourceDeclaration() } or
99+
TSyntheticCallable(SyntheticCallable c)
100+
101+
/**
102+
* A callable that may have a flow summary. This is either a regular `Callable`
103+
* or a `SyntheticCallable`.
104+
*/
105+
class SummarizedCallableBase extends TSummarizedCallableBase {
106+
/** Gets a textual representation of this callable. */
107+
string toString() { result = this.asCallable().toString() or result = this.asSyntheticCallable() }
108+
109+
/** Gets the source location for this callable. */
110+
Location getLocation() {
111+
result = this.asCallable().getLocation()
112+
or
113+
result.hasLocationInfo("", 0, 0, 0, 0) and
114+
this instanceof TSyntheticCallable
115+
}
116+
117+
/** Gets this callable cast as a `Callable`. */
118+
Callable asCallable() { this = TSimpleCallable(result) }
119+
120+
/** Gets this callable cast as a `SyntheticCallable`. */
121+
SyntheticCallable asSyntheticCallable() { this = TSyntheticCallable(result) }
122+
123+
/** Gets a call that targets this callable. */
124+
Call getACall() {
125+
result.getCallee().getSourceDeclaration() = this.asCallable()
126+
or
127+
result = this.asSyntheticCallable().getACall()
128+
}
129+
130+
/**
131+
* Gets the type of the parameter at the specified position with -1 indicating
132+
* the instance parameter.
133+
*/
134+
Type getParameterType(int pos) {
135+
result = this.asCallable().getParameterType(pos)
136+
or
137+
pos = -1 and result = this.asCallable().getDeclaringType()
138+
or
139+
result = this.asSyntheticCallable().getParameterType(pos)
140+
or
141+
exists(SyntheticCallable sc | sc = this.asSyntheticCallable() |
142+
Impl::Private::summaryParameterNodeRange(this, pos) and
143+
not exists(sc.getParameterType(pos)) and
144+
result instanceof TypeObject
145+
)
146+
}
147+
148+
/** Gets the return type of this callable. */
149+
Type getReturnType() {
150+
result = this.asCallable().getReturnType()
151+
or
152+
exists(SyntheticCallable sc | sc = this.asSyntheticCallable() |
153+
result = sc.getReturnType()
154+
or
155+
not exists(sc.getReturnType()) and
156+
result instanceof TypeObject
157+
)
158+
}
159+
}
160+
49161
class SummarizedCallable = Impl::Public::SummarizedCallable;
50162

163+
/**
164+
* An adapter class to add the flow summaries specified on `SyntheticCallable`
165+
* to `SummarizedCallable`.
166+
*/
167+
private class SummarizedSyntheticCallableAdapter extends SummarizedCallable, TSyntheticCallable {
168+
override predicate propagatesFlow(
169+
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
170+
) {
171+
this.asSyntheticCallable().propagatesFlow(input, output, preservesValue)
172+
}
173+
}
174+
51175
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;

java/ql/lib/semmle/code/java/dataflow/internal/DataFlowDispatch.qll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@ private import semmle.code.java.dispatch.internal.Unification
99

1010
private module DispatchImpl {
1111
private predicate hasHighConfidenceTarget(Call c) {
12-
exists(SummarizedCallable sc |
13-
sc = c.getCallee().getSourceDeclaration() and not sc.isAutoGenerated()
14-
)
12+
exists(SummarizedCallable sc | sc.getACall() = c and not sc.isAutoGenerated())
1513
or
1614
exists(Callable srcTgt |
1715
srcTgt = VirtualDispatch::viableCallable(c) and
@@ -30,7 +28,7 @@ private module DispatchImpl {
3028
DataFlowCallable viableCallable(DataFlowCall c) {
3129
result.asCallable() = sourceDispatch(c.asCall())
3230
or
33-
result.asSummarizedCallable() = c.asCall().getCallee().getSourceDeclaration()
31+
result.asSummarizedCallable().getACall() = c.asCall()
3432
}
3533

3634
/**
@@ -144,7 +142,7 @@ private module DispatchImpl {
144142
not Unification::failsUnification(t, t2)
145143
)
146144
or
147-
result.asSummarizedCallable() = def
145+
result.asSummarizedCallable().getACall() = ma
148146
)
149147
}
150148

java/ql/lib/semmle/code/java/dataflow/internal/DataFlowNodes.qll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -463,11 +463,7 @@ module Private {
463463
c.asSummarizedCallable() = sc and pos = pos_
464464
}
465465

466-
Type getTypeImpl() {
467-
result = sc.getParameter(pos_).getType()
468-
or
469-
pos_ = -1 and result = sc.getDeclaringType()
470-
}
466+
Type getTypeImpl() { result = sc.getParameterType(pos_) }
471467
}
472468
}
473469

java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,6 @@ class DataFlowCallable extends TDataFlowCallable {
241241

242242
Field asFieldScope() { this = TFieldScope(result) }
243243

244-
RefType getDeclaringType() {
245-
result = this.asCallable().getDeclaringType() or
246-
result = this.asSummarizedCallable().getDeclaringType() or
247-
result = this.asFieldScope().getDeclaringType()
248-
}
249-
250244
string toString() {
251245
result = this.asCallable().toString() or
252246
result = "Synthetic: " + this.asSummarizedCallable().toString() or

java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImplSpecific.qll

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,9 @@ private import DataFlowUtil
99
private import FlowSummaryImpl::Private
1010
private import FlowSummaryImpl::Public
1111
private import semmle.code.java.dataflow.ExternalFlow
12+
private import semmle.code.java.dataflow.FlowSummary as FlowSummary
1213

13-
private module FlowSummaries {
14-
private import semmle.code.java.dataflow.FlowSummary as F
15-
}
16-
17-
class SummarizedCallableBase = Callable;
14+
class SummarizedCallableBase = FlowSummary::SummarizedCallableBase;
1815

1916
DataFlowCallable inject(SummarizedCallable c) { result.asSummarizedCallable() = c }
2017

@@ -67,26 +64,28 @@ private boolean isGenerated(string provenance) {
6764
* `input`, output specification `output`, kind `kind`, and a flag `generated`
6865
* stating whether the summary is autogenerated.
6966
*/
70-
predicate summaryElement(Callable c, string input, string output, string kind, boolean generated) {
67+
predicate summaryElement(
68+
SummarizedCallableBase c, string input, string output, string kind, boolean generated
69+
) {
7170
exists(
7271
string namespace, string type, boolean subtypes, string name, string signature, string ext,
7372
string provenance
7473
|
7574
summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) and
7675
generated = isGenerated(provenance) and
77-
c = interpretElement(namespace, type, subtypes, name, signature, ext)
76+
c.asCallable() = interpretElement(namespace, type, subtypes, name, signature, ext)
7877
)
7978
}
8079

8180
/**
8281
* Holds if a negative flow summary exists for `c`, which means that there is no
8382
* flow through `c`. The flag `generated` states whether the summary is autogenerated.
8483
*/
85-
predicate negativeSummaryElement(Callable c, boolean generated) {
84+
predicate negativeSummaryElement(SummarizedCallableBase c, boolean generated) {
8685
exists(string namespace, string type, string name, string signature, string provenance |
8786
negativeSummaryModel(namespace, type, name, signature, provenance) and
8887
generated = isGenerated(provenance) and
89-
c = interpretElement(namespace, type, false, name, signature, "")
88+
c.asCallable() = interpretElement(namespace, type, false, name, signature, "")
9089
)
9190
}
9291

java/ql/lib/semmle/code/java/frameworks/Stream.qll

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,101 @@
11
/** Definitions related to `java.util.stream`. */
22

33
private import semmle.code.java.dataflow.ExternalFlow
4+
private import semmle.code.java.dataflow.FlowSummary
5+
6+
private class CollectCall extends MethodAccess {
7+
CollectCall() {
8+
this.getMethod()
9+
.getSourceDeclaration()
10+
.hasQualifiedName("java.util.stream", "Stream", "collect")
11+
}
12+
}
13+
14+
private class Collector extends MethodAccess {
15+
Collector() {
16+
this.getMethod().getDeclaringType().hasQualifiedName("java.util.stream", "Collectors")
17+
}
18+
19+
predicate hasName(string name) { this.getMethod().hasName(name) }
20+
}
21+
22+
private class CollectToContainer extends SyntheticCallable {
23+
CollectToContainer() { this = "java.util.stream.collect()+Collectors.[toList,...]" }
24+
25+
override Call getACall() {
26+
result
27+
.(CollectCall)
28+
.getArgument(0)
29+
.(Collector)
30+
.hasName([
31+
"maxBy", "minBy", "toCollection", "toList", "toSet", "toUnmodifiableList",
32+
"toUnmodifiableSet"
33+
])
34+
}
35+
36+
override predicate propagatesFlow(
37+
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
38+
) {
39+
input = SummaryComponentStack::elementOf(SummaryComponentStack::qualifier()) and
40+
output = SummaryComponentStack::elementOf(SummaryComponentStack::return()) and
41+
preservesValue = true
42+
}
43+
}
44+
45+
private class CollectToJoining extends SyntheticCallable {
46+
CollectToJoining() { this = "java.util.stream.collect()+Collectors.joining" }
47+
48+
override Call getACall() { result.(CollectCall).getArgument(0).(Collector).hasName("joining") }
49+
50+
override predicate propagatesFlow(
51+
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
52+
) {
53+
input = SummaryComponentStack::elementOf(SummaryComponentStack::qualifier()) and
54+
output = SummaryComponentStack::return() and
55+
preservesValue = false
56+
}
57+
58+
override Type getReturnType() { result instanceof TypeString }
59+
}
60+
61+
private class CollectToGroupingBy extends SyntheticCallable {
62+
CollectToGroupingBy() {
63+
this = "java.util.stream.collect()+Collectors.[groupingBy(Function),...]"
64+
}
65+
66+
override Call getACall() {
67+
exists(Method m |
68+
m = result.(CollectCall).getArgument(0).(Collector).getMethod() and
69+
m.hasName(["groupingBy", "groupingByConcurrent", "partitioningBy"]) and
70+
m.getNumberOfParameters() = 1
71+
)
72+
}
73+
74+
override predicate propagatesFlow(
75+
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
76+
) {
77+
input = SummaryComponentStack::elementOf(SummaryComponentStack::qualifier()) and
78+
output =
79+
SummaryComponentStack::elementOf(SummaryComponentStack::mapValueOf(SummaryComponentStack::return())) and
80+
preservesValue = true
81+
}
82+
}
83+
84+
private class RequiredComponentStackForCollect extends RequiredSummaryComponentStack {
85+
override predicate required(SummaryComponent head, SummaryComponentStack tail) {
86+
head = SummaryComponent::element() and
87+
tail = SummaryComponentStack::qualifier()
88+
or
89+
head = SummaryComponent::element() and
90+
tail = SummaryComponentStack::return()
91+
or
92+
head = SummaryComponent::element() and
93+
tail = SummaryComponentStack::mapValueOf(SummaryComponentStack::return())
94+
or
95+
head = SummaryComponent::mapValue() and
96+
tail = SummaryComponentStack::return()
97+
}
98+
}
499

5100
private class StreamModel extends SummaryModelCsv {
6101
override predicate row(string s) {
@@ -19,7 +114,7 @@ private class StreamModel extends SummaryModelCsv {
19114
"java.util.stream;Stream;true;collect;(Supplier,BiConsumer,BiConsumer);;Argument[1].Parameter[0];Argument[2].Parameter[0..1];value;manual",
20115
"java.util.stream;Stream;true;collect;(Supplier,BiConsumer,BiConsumer);;Argument[2].Parameter[0..1];Argument[1].Parameter[0];value;manual",
21116
"java.util.stream;Stream;true;collect;(Supplier,BiConsumer,BiConsumer);;Argument[-1].Element;Argument[1].Parameter[1];value;manual",
22-
// Missing: collect(Collector<T,A,R> collector)
117+
// collect(Collector<T,A,R> collector) is handled separately on a case-by-case basis as it is too complex for MaD
23118
"java.util.stream;Stream;true;concat;(Stream,Stream);;Argument[0..1].Element;ReturnValue.Element;value;manual",
24119
"java.util.stream;Stream;true;distinct;();;Argument[-1].Element;ReturnValue.Element;value;manual",
25120
"java.util.stream;Stream;true;dropWhile;(Predicate);;Argument[-1].Element;Argument[0].Parameter[0];value;manual",

0 commit comments

Comments
 (0)