Skip to content

Commit cba7b98

Browse files
committed
Shared: Add DataFlow::DeduplicatePathGraph
1 parent f097168 commit cba7b98

File tree

1 file changed

+172
-0
lines changed

1 file changed

+172
-0
lines changed

shared/dataflow/codeql/dataflow/DataFlow.qll

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -851,4 +851,176 @@ module DataFlowMake<LocationSig Location, InputSig<Location> Lang> {
851851
}
852852
}
853853
}
854+
855+
/**
856+
* Generates a `PathGraph` in which equivalent path nodes are merged, in order to avoid duplicate paths.
857+
*/
858+
module DeduplicatePathGraph<PathNodeSig InputPathNode, PathGraphSig<InputPathNode> Graph> {
859+
// NOTE: there is a known limitation in that this module cannot see which nodes are sources or sinks.
860+
// This only matters in the rare case where a sink PathNode has a non-empty set of succesors, and there is a
861+
// non-sink PathNode with the same `(node, toString)` value and the same successors, but is transitively
862+
// reachable from a different set of PathNodes. (And conversely for sources).
863+
//
864+
/**
865+
* Gets a successor of `node`, taking `subpaths` into account.
866+
*/
867+
private InputPathNode getASuccessorLike(InputPathNode node) {
868+
Graph::edges(node, result)
869+
or
870+
Graph::subpaths(node, _, _, result) // arg -> out
871+
//
872+
// Note that there is no case for `arg -> param` or `ret -> out` for subpaths.
873+
// It is OK to collapse nodes inside a subpath while calls to that subpaths aren't collapsed and vice versa.
874+
}
875+
876+
private InputPathNode getAPredecessorLike(InputPathNode node) {
877+
node = getASuccessorLike(result)
878+
}
879+
880+
pragma[nomagic]
881+
private InputPathNode getAPathNode(Node node, string toString) {
882+
result.getNode() = node and
883+
Graph::nodes(result, _, toString)
884+
}
885+
886+
private signature predicate collapseCandidateSig(Node node, string toString);
887+
888+
private signature InputPathNode stepSig(InputPathNode node);
889+
890+
/**
891+
* Performs a forward or backward pass computing which `(node, toString)` pairs can subsume their corresponding
892+
* path nodes.
893+
*
894+
* This is similar to automaton minimization, but for an NFA. Since minimizing an NFA is NP-hard (and does not have
895+
* a unique minimal NFA), we operate with the simpler model: for a given `(node, toString)` pair, either all
896+
* corresponding path nodes are merged, or none are merged.
897+
*
898+
* Comments are written as if this checks for outgoing edges and propagates backward, though the module is also
899+
* used to perform the opposite direction.
900+
*/
901+
private module MakeDiscriminatorPass<collapseCandidateSig/2 collapseCandidate, stepSig/1 step> {
902+
/**
903+
* Gets the number of `(node, toString)` pairs reachable in one step from `pathNode`.
904+
*/
905+
private int getOutDegreeFromPathNode(InputPathNode pathNode) {
906+
result = count(Node node, string toString | step(pathNode) = getAPathNode(node, toString))
907+
}
908+
909+
/**
910+
* Gets the number of `(node2, toString2)` pairs reachable in one step from path nodes corresponding to `(node, toString)`.
911+
*/
912+
private int getOutDegreeFromNode(Node node, string toString) {
913+
result =
914+
strictcount(Node node2, string toString2 |
915+
step(getAPathNode(node, toString)) = getAPathNode(node2, toString2)
916+
)
917+
}
918+
919+
/** Holds if `(node, toString)` cannot be collapsed (but was a candidate for being collapsed). */
920+
predicate discriminatedPair(Node node, string toString) {
921+
collapseCandidate(node, toString) and
922+
(
923+
// Check if all corresponding PathNodes have the same successor sets when projected to `(node, toString)`.
924+
// To do this, we check that each successor set has the same size as the union of the succesor sets.
925+
// - If the successor sets are equal, then they are also equal to their union, and so have the correct size.
926+
// - Conversely, if two successor sets are not equal, one of them must be missing an element that is present
927+
// in the union, but must still be a subset of the union, and thus be strictly smaller than the union.
928+
getOutDegreeFromPathNode(getAPathNode(node, toString)) <
929+
getOutDegreeFromNode(node, toString)
930+
or
931+
// Retain flow state if one of the successors requires it to be retained
932+
discriminatedPathNode(step(getAPathNode(node, toString)))
933+
)
934+
}
935+
936+
/** Holds if `pathNode` cannot be collapsed. */
937+
predicate discriminatedPathNode(InputPathNode pathNode) {
938+
exists(Node node, string toString |
939+
discriminatedPair(node, toString) and
940+
getAPathNode(node, toString) = pathNode
941+
)
942+
}
943+
}
944+
945+
private predicate initialCandidate(Node node, string toString) {
946+
exists(getAPathNode(node, toString))
947+
}
948+
949+
private module Pass1 = MakeDiscriminatorPass<initialCandidate/2, getASuccessorLike/1>;
950+
951+
private module Pass2 = MakeDiscriminatorPass<Pass1::discriminatedPair/2, getAPredecessorLike/1>;
952+
953+
private newtype TPathNode =
954+
TPreservedPathNode(InputPathNode node) { Pass2::discriminatedPathNode(node) } or
955+
TCollapsedPathNode(Node node, string toString) {
956+
initialCandidate(node, toString) and
957+
not Pass2::discriminatedPair(node, toString)
958+
}
959+
960+
/** A node in the path graph after equivalent nodes have been collapsed. */
961+
class PathNode extends TPathNode {
962+
private Node asCollapsedNode() { this = TCollapsedPathNode(result, _) }
963+
964+
private InputPathNode asPreservedNode() { this = TPreservedPathNode(result) }
965+
966+
/** Gets a correspondng node in the original graph. */
967+
InputPathNode getAnOriginalPathNode() {
968+
exists(Node node, string toString |
969+
this = TCollapsedPathNode(node, toString) and
970+
result = getAPathNode(node, toString)
971+
)
972+
or
973+
result = this.asPreservedNode()
974+
}
975+
976+
/** Gets a string representation of this node. */
977+
string toString() {
978+
result = this.asPreservedNode().toString() or this = TCollapsedPathNode(_, result)
979+
}
980+
981+
/**
982+
* Holds if this element is at the specified location.
983+
* The location spans column `startcolumn` of line `startline` to
984+
* column `endcolumn` of line `endline` in file `filepath`.
985+
* For more information, see
986+
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
987+
*/
988+
predicate hasLocationInfo(
989+
string filepath, int startline, int startcolumn, int endline, int endcolumn
990+
) {
991+
this.getAnOriginalPathNode()
992+
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
993+
}
994+
995+
/** Gets the corresponding data-flow node. */
996+
Node getNode() {
997+
result = this.asCollapsedNode()
998+
or
999+
result = this.asPreservedNode().getNode()
1000+
}
1001+
}
1002+
1003+
/**
1004+
* Provides the query predicates needed to include a graph in a path-problem query.
1005+
*/
1006+
module PathGraph implements PathGraphSig<PathNode> {
1007+
query predicate nodes(PathNode node, string key, string val) {
1008+
Graph::nodes(node.getAnOriginalPathNode(), key, val)
1009+
}
1010+
1011+
query predicate edges(PathNode node1, PathNode node2) {
1012+
Graph::edges(node1.getAnOriginalPathNode(), node2.getAnOriginalPathNode())
1013+
}
1014+
1015+
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
1016+
// Note: this may look suspiciously simple, but it's not an oversight. Even if the caller needs to retain state,
1017+
// it is entirely possible to step through a subpath in which state has been projected away.
1018+
Graph::subpaths(arg.getAnOriginalPathNode(), par.getAnOriginalPathNode(),
1019+
ret.getAnOriginalPathNode(), out.getAnOriginalPathNode())
1020+
}
1021+
}
1022+
1023+
// Re-export the PathGraph so the user can import a single module and get both PathNode and the query predicates
1024+
import PathGraph
1025+
}
8541026
}

0 commit comments

Comments
 (0)