Skip to content

Commit b579e2e

Browse files
authored
Merge pull request github#11493 from aschackmull/java/scc-equivrel
Java: Replace ad-hoc SCC reduction with union-find.
2 parents be16890 + 0963b6f commit b579e2e

File tree

1 file changed

+56
-116
lines changed

1 file changed

+56
-116
lines changed

java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll

Lines changed: 56 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,6 @@ private class TypeFlowNode extends TTypeFlowNode {
5353
}
5454
}
5555

56-
private int getNodeKind(TypeFlowNode n) {
57-
result = 1 and n instanceof TField
58-
or
59-
result = 2 and n instanceof TSsa
60-
or
61-
result = 3 and n instanceof TExpr
62-
or
63-
result = 4 and n instanceof TMethod
64-
}
65-
6656
/** Gets `t` if it is a `RefType` or the boxed type if `t` is a primitive type. */
6757
private RefType boxIfNeeded(Type t) {
6858
t.(PrimitiveType).getBoxedType() = result or
@@ -158,107 +148,45 @@ private predicate joinStep(TypeFlowNode n1, TypeFlowNode n2) {
158148

159149
private predicate anyStep(TypeFlowNode n1, TypeFlowNode n2) { joinStep(n1, n2) or step(n1, n2) }
160150

161-
private import SccReduction
151+
private predicate sccEdge(TypeFlowNode n1, TypeFlowNode n2) { anyStep(n1, n2) and anyStep+(n2, n1) }
162152

163-
/**
164-
* SCC reduction.
165-
*
166-
* This ought to be as easy as `equivalenceRelation(sccEdge/2)(n, scc)`, but
167-
* this HOP is not currently supported for newtypes.
168-
*
169-
* A straightforward implementation would be:
170-
* ```ql
171-
* predicate sccRepr(TypeFlowNode n, TypeFlowNode scc) {
172-
* scc =
173-
* max(TypeFlowNode n2 |
174-
* sccEdge+(n, n2)
175-
* |
176-
* n2
177-
* order by
178-
* n2.getLocation().getStartLine(), n2.getLocation().getStartColumn(), getNodeKind(n2)
179-
* )
180-
* }
181-
*
182-
* ```
183-
* but this is quadratic in the size of the SCCs.
184-
*
185-
* Instead we find local maxima by following SCC edges and determine the SCC
186-
* representatives from those.
187-
* (This is still worst-case quadratic in the size of the SCCs, but generally
188-
* performs better.)
189-
*/
190-
private module SccReduction {
191-
private predicate sccEdge(TypeFlowNode n1, TypeFlowNode n2) {
192-
anyStep(n1, n2) and anyStep+(n2, n1)
193-
}
153+
private module Scc = QlBuiltins::EquivalenceRelation<TypeFlowNode, sccEdge/2>;
194154

195-
private predicate sccEdgeWithMax(TypeFlowNode n1, TypeFlowNode n2, TypeFlowNode m) {
196-
sccEdge(n1, n2) and
197-
m =
198-
max(TypeFlowNode n |
199-
n = [n1, n2]
200-
|
201-
n order by n.getLocation().getStartLine(), n.getLocation().getStartColumn(), getNodeKind(n)
202-
)
203-
}
155+
private class TypeFlowScc = Scc::EquivalenceClass;
204156

205-
private predicate hasLargerNeighbor(TypeFlowNode n) {
206-
exists(TypeFlowNode n2 |
207-
sccEdgeWithMax(n, n2, n2) and
208-
not sccEdgeWithMax(n, n2, n)
209-
or
210-
sccEdgeWithMax(n2, n, n2) and
211-
not sccEdgeWithMax(n2, n, n)
212-
)
213-
}
157+
/** Holds if `n` is part of an SCC of size 2 or more represented by `scc`. */
158+
private predicate sccRepr(TypeFlowNode n, TypeFlowScc scc) { scc = Scc::getEquivalenceClass(n) }
214159

215-
private predicate localMax(TypeFlowNode m) {
216-
sccEdgeWithMax(_, _, m) and
217-
not hasLargerNeighbor(m)
218-
}
160+
private predicate sccJoinStep(TypeFlowNode n, TypeFlowScc scc) {
161+
exists(TypeFlowNode mid |
162+
joinStep(n, mid) and
163+
sccRepr(mid, scc) and
164+
not sccRepr(n, scc)
165+
)
166+
}
219167

220-
private predicate sccReprFromLocalMax(TypeFlowNode scc) {
221-
exists(TypeFlowNode m |
222-
localMax(m) and
223-
scc =
224-
max(TypeFlowNode n2 |
225-
sccEdge+(m, n2) and localMax(n2)
226-
|
227-
n2
228-
order by
229-
n2.getLocation().getStartLine(), n2.getLocation().getStartColumn(), getNodeKind(n2)
230-
)
231-
)
232-
}
168+
private signature class NodeSig;
233169

234-
/** Holds if `n` is part of an SCC of size 2 or more represented by `scc`. */
235-
predicate sccRepr(TypeFlowNode n, TypeFlowNode scc) {
236-
sccEdge+(n, scc) and sccReprFromLocalMax(scc)
237-
}
170+
private signature module Edge {
171+
class Node;
238172

239-
predicate sccJoinStep(TypeFlowNode n, TypeFlowNode scc) {
240-
exists(TypeFlowNode mid |
241-
joinStep(n, mid) and
242-
sccRepr(mid, scc) and
243-
not sccRepr(n, scc)
244-
)
245-
}
173+
predicate edge(TypeFlowNode n1, Node n2);
246174
}
247175

248-
private signature predicate edgeSig(TypeFlowNode n1, TypeFlowNode n2);
176+
private signature module RankedEdge<NodeSig Node> {
177+
predicate edgeRank(int r, TypeFlowNode n1, Node n2);
249178

250-
private signature module RankedEdge {
251-
predicate edgeRank(int r, TypeFlowNode n1, TypeFlowNode n2);
252-
253-
int lastRank(TypeFlowNode n);
179+
int lastRank(Node n);
254180
}
255181

256-
private module RankEdge<edgeSig/2 edge> implements RankedEdge {
182+
private module RankEdge<Edge E> implements RankedEdge<E::Node> {
183+
private import E
184+
257185
/**
258186
* Holds if `r` is a ranking of the incoming edges `(n1,n2)` to `n2`. The used
259187
* ordering is not necessarily total, so the ranking may have gaps.
260188
*/
261-
private predicate edgeRank1(int r, TypeFlowNode n1, TypeFlowNode n2) {
189+
private predicate edgeRank1(int r, TypeFlowNode n1, Node n2) {
262190
n1 =
263191
rank[r](TypeFlowNode n |
264192
edge(n, n2)
@@ -271,19 +199,19 @@ private module RankEdge<edgeSig/2 edge> implements RankedEdge {
271199
* Holds if `r2` is a ranking of the ranks from `edgeRank1`. This removes the
272200
* gaps from the ranking.
273201
*/
274-
private predicate edgeRank2(int r2, int r1, TypeFlowNode n) {
202+
private predicate edgeRank2(int r2, int r1, Node n) {
275203
r1 = rank[r2](int r | edgeRank1(r, _, n) | r)
276204
}
277205

278206
/** Holds if `r` is a ranking of the incoming edges `(n1,n2)` to `n2`. */
279-
predicate edgeRank(int r, TypeFlowNode n1, TypeFlowNode n2) {
207+
predicate edgeRank(int r, TypeFlowNode n1, Node n2) {
280208
exists(int r1 |
281209
edgeRank1(r1, n1, n2) and
282210
edgeRank2(r, r1, n2)
283211
)
284212
}
285213

286-
int lastRank(TypeFlowNode n) { result = max(int r | edgeRank(r, _, n)) }
214+
int lastRank(Node n) { result = max(int r | edgeRank(r, _, n)) }
287215
}
288216

289217
private signature module TypePropagation {
@@ -296,16 +224,16 @@ private signature module TypePropagation {
296224
}
297225

298226
/** Implements recursion through `forall` by way of edge ranking. */
299-
private module ForAll<RankedEdge Edge, TypePropagation T> {
227+
private module ForAll<NodeSig Node, RankedEdge<Node> E, TypePropagation T> {
300228
/**
301229
* Holds if `t` is a bound that holds on one of the incoming edges to `n` and
302230
* thus is a candidate bound for `n`.
303231
*/
304232
pragma[nomagic]
305-
private predicate candJoinType(TypeFlowNode n, T::Typ t) {
233+
private predicate candJoinType(Node n, T::Typ t) {
306234
exists(TypeFlowNode mid |
307235
T::candType(mid, t) and
308-
Edge::edgeRank(_, mid, n)
236+
E::edgeRank(_, mid, n)
309237
)
310238
}
311239

@@ -314,26 +242,38 @@ private module ForAll<RankedEdge Edge, TypePropagation T> {
314242
* through the edges into `n` ranked from `1` to `r`.
315243
*/
316244
pragma[assume_small_delta]
317-
private predicate flowJoin(int r, TypeFlowNode n, T::Typ t) {
245+
private predicate flowJoin(int r, Node n, T::Typ t) {
318246
(
319247
r = 1 and candJoinType(n, t)
320248
or
321-
flowJoin(r - 1, n, t) and Edge::edgeRank(r, _, n)
249+
flowJoin(r - 1, n, t) and E::edgeRank(r, _, n)
322250
) and
323-
forall(TypeFlowNode mid | Edge::edgeRank(r, mid, n) | T::supportsType(mid, t))
251+
forall(TypeFlowNode mid | E::edgeRank(r, mid, n) | T::supportsType(mid, t))
324252
}
325253

326254
/**
327255
* Holds if `t` is a candidate bound for `n` that is also valid for data
328256
* coming through all the incoming edges, and therefore is a valid bound for
329257
* `n`.
330258
*/
331-
predicate flowJoin(TypeFlowNode n, T::Typ t) { flowJoin(Edge::lastRank(n), n, t) }
259+
predicate flowJoin(Node n, T::Typ t) { flowJoin(E::lastRank(n), n, t) }
260+
}
261+
262+
private module JoinStep implements Edge {
263+
class Node = TypeFlowNode;
264+
265+
predicate edge = joinStep/2;
266+
}
267+
268+
private module SccJoinStep implements Edge {
269+
class Node = TypeFlowScc;
270+
271+
predicate edge = sccJoinStep/2;
332272
}
333273

334-
module RankedJoinStep = RankEdge<joinStep/2>;
274+
private module RankedJoinStep = RankEdge<JoinStep>;
335275

336-
module RankedSccJoinStep = RankEdge<sccJoinStep/2>;
276+
private module RankedSccJoinStep = RankEdge<SccJoinStep>;
337277

338278
private predicate exactTypeBase(TypeFlowNode n, RefType t) {
339279
exists(ClassInstanceExpr e |
@@ -363,13 +303,13 @@ private predicate exactType(TypeFlowNode n, RefType t) {
363303
or
364304
// The following is an optimized version of
365305
// `forex(TypeFlowNode mid | joinStep(mid, n) | exactType(mid, t))`
366-
ForAll<RankedJoinStep, ExactTypePropagation>::flowJoin(n, t)
306+
ForAll<TypeFlowNode, RankedJoinStep, ExactTypePropagation>::flowJoin(n, t)
367307
or
368-
exists(TypeFlowNode scc |
308+
exists(TypeFlowScc scc |
369309
sccRepr(n, scc) and
370310
// Optimized version of
371311
// `forex(TypeFlowNode mid | sccJoinStep(mid, scc) | exactType(mid, t))`
372-
ForAll<RankedSccJoinStep, ExactTypePropagation>::flowJoin(scc, t)
312+
ForAll<TypeFlowScc, RankedSccJoinStep, ExactTypePropagation>::flowJoin(scc, t)
373313
)
374314
}
375315

@@ -563,11 +503,11 @@ private predicate typeFlow(TypeFlowNode n, RefType t) {
563503
or
564504
exists(TypeFlowNode mid | typeFlow(mid, t) and step(mid, n))
565505
or
566-
ForAll<RankedJoinStep, TypeFlowPropagation>::flowJoin(n, t)
506+
ForAll<TypeFlowNode, RankedJoinStep, TypeFlowPropagation>::flowJoin(n, t)
567507
or
568-
exists(TypeFlowNode scc |
508+
exists(TypeFlowScc scc |
569509
sccRepr(n, scc) and
570-
ForAll<RankedSccJoinStep, TypeFlowPropagation>::flowJoin(scc, t)
510+
ForAll<TypeFlowScc, RankedSccJoinStep, TypeFlowPropagation>::flowJoin(scc, t)
571511
)
572512
}
573513

@@ -703,13 +643,13 @@ private predicate hasUnionTypeFlow(TypeFlowNode n) {
703643
(
704644
// Optimized version of
705645
// `forex(TypeFlowNode mid | joinStep(mid, n) | unionTypeFlowBaseCand(mid, _, _) or hasUnionTypeFlow(mid))`
706-
ForAll<RankedJoinStep, HasUnionTypePropagation>::flowJoin(n, _)
646+
ForAll<TypeFlowNode, RankedJoinStep, HasUnionTypePropagation>::flowJoin(n, _)
707647
or
708-
exists(TypeFlowNode scc |
648+
exists(TypeFlowScc scc |
709649
sccRepr(n, scc) and
710650
// Optimized version of
711651
// `forex(TypeFlowNode mid | sccJoinStep(mid, scc) | unionTypeFlowBaseCand(mid, _, _) or hasUnionTypeFlow(mid))`
712-
ForAll<RankedSccJoinStep, HasUnionTypePropagation>::flowJoin(scc, _)
652+
ForAll<TypeFlowScc, RankedSccJoinStep, HasUnionTypePropagation>::flowJoin(scc, _)
713653
)
714654
or
715655
exists(TypeFlowNode mid | step(mid, n) and hasUnionTypeFlow(mid))

0 commit comments

Comments
 (0)