Skip to content

Commit 4b74fa6

Browse files
committed
QL: Add global flow and type-tracking
1 parent 0ffb558 commit 4b74fa6

File tree

2 files changed

+393
-0
lines changed

2 files changed

+393
-0
lines changed

ql/ql/src/codeql_ql/dataflow/DataFlow.qll

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ private import codeql_ql.ast.Ast
22
private import internal.NodesInternal
33
private import internal.DataFlowNumbering
44
private import internal.LocalFlow as LocalFlow
5+
private import internal.GlobalFlow as GlobalFlow
56

67
/**
78
* An expression or variable in a formula, including some additional nodes
@@ -10,6 +11,8 @@ private import internal.LocalFlow as LocalFlow
1011
* Nodes that are locally bound together by equalities are clustered into a "super node",
1112
* which can be accessed using `getSuperNode()`. There is usually no reason to use `Node` directly
1213
* other than to reason about what kind of node is contained in a super node.
14+
*
15+
* To reason about global data flow, use `SuperNode.track()`.
1316
*/
1417
class Node extends TNode {
1518
string toString() { none() } // overridden in subclasses
@@ -31,6 +34,8 @@ class Node extends TNode {
3134
/**
3235
* Gets the collection of data-flow nodes locally bound by equalities, represented
3336
* by a "super node".
37+
*
38+
* Super nodes are the medium through which to propagate data-flow information globally.
3439
*/
3540
SuperNode getSuperNode() { result.getANode() = this }
3641
}
@@ -224,6 +229,8 @@ Node fieldNode(Predicate pred, FieldDecl fieldDecl) {
224229

225230
/**
226231
* A collection of data-flow nodes in the same predicate, locally bound by equalities.
232+
*
233+
* To reason about global data flow, use `SuperNode.track()`.
227234
*/
228235
class SuperNode extends LocalFlow::TSuperNode {
229236
private int repr;
@@ -282,10 +289,154 @@ class SuperNode extends LocalFlow::TSuperNode {
282289
result = this.getALocalMemberCall() and
283290
result.getMemberName() = name
284291
}
292+
293+
/**
294+
* Gets a node that this node may "flow to" after one step.
295+
*
296+
* Basic usage of `track()` to track some expressions looks like this:
297+
* ```
298+
* DataFlow::SuperNode myThing(DataFlow::Tracker t) {
299+
* t.start() and
300+
* result = DataFlow::superNode(< some ast node >)
301+
* or
302+
* exists (DataFlow::Tracker t2 |
303+
* result = myThing(t2).track(t2, t)
304+
* )
305+
* }
306+
*
307+
* DataFlow::SuperNode myThing() { result = myThing(DataFlow::Tracker::end()) }
308+
* ```
309+
*/
310+
pragma[inline]
311+
SuperNode track(Tracker t1, Tracker t2) {
312+
// Return state -> return state
313+
// Store the return edge in t2
314+
not t1.hasCall() and
315+
GlobalFlow::directedEdgeSuper(result, this, t2)
316+
or
317+
// Call state or initial state -> call state
318+
t1.hasCallOrIsStart() and
319+
t2.hasCall() and
320+
GlobalFlow::directedEdgeSuper(this, result, _)
321+
or
322+
// Return state -> call state
323+
// The last-used return edge must not be used as the initial call edge
324+
// (doing so would allow returning out of a disjunction and into another branch of that disjunction)
325+
not t1.hasCall() and
326+
t2.hasCall() and
327+
exists(GlobalFlow::EdgeLabel edge |
328+
GlobalFlow::directedEdgeSuper(this, result, edge) and
329+
edge != t1
330+
)
331+
}
332+
333+
/**
334+
* Gets node containing a string flowing to this node via `t`.
335+
*/
336+
cached
337+
private string getAStringValue(Tracker t) {
338+
t.start() and
339+
result = asAstNode().(String).getValue()
340+
or
341+
exists(SuperNode pred, Tracker t2 |
342+
this = pred.track(t2, t) and
343+
result = pred.getAStringValue(t2)
344+
)
345+
or
346+
// Step through calls to a few built-ins that don't cause a blow-up
347+
exists(SuperNode pred, string methodName, string oldValue |
348+
this.asAstNode() = pred.getALocalMemberCall(methodName) and
349+
oldValue = pred.getAStringValue(t)
350+
|
351+
methodName = "toLowerCase" and
352+
result = oldValue.toLowerCase()
353+
or
354+
methodName = "toUpperCase" and
355+
result = oldValue.toUpperCase()
356+
)
357+
}
358+
359+
/** Gets a string constant that may flow here (possibly from a caller context). */
360+
pragma[inline]
361+
string getAStringValue() { result = this.getAStringValue(Tracker::end()) }
362+
363+
/** Gets a string constant that may flow here, possibly out of callees, but not from caller contexts. */
364+
pragma[inline]
365+
string getAStringValueNoCall() { result = this.getAStringValue(Tracker::endNoCall()) }
366+
367+
/**
368+
* Gets a string constant that may flow here, which can safely be combined with another
369+
* value that was tracked here with `otherT`.
370+
*
371+
* This is under-approximate and will fail to accept valid matches when both values
372+
* came in from the same chain of calls.
373+
*/
374+
bindingset[otherT]
375+
string getAStringValueForContext(Tracker otherT) {
376+
exists(Tracker stringT |
377+
result = this.getAStringValue(stringT) and
378+
otherT.isSafeToCombineWith(stringT)
379+
)
380+
}
285381
}
286382

287383
/** Gets the super node for the given AST node. */
288384
pragma[inline]
289385
SuperNode superNode(AstNode node) {
290386
result = astNode(node).getSuperNode()
291387
}
388+
389+
/**
390+
* A summary of the steps needed to reach a node in the global data flow graph,
391+
* to be used in combination with `SuperNode.track`.
392+
*/
393+
class Tracker extends GlobalFlow::TEdgeLabelOrTrackerState {
394+
/** Holds if this is the starting point, that is, the summary of the empty path. */
395+
predicate start() { this = GlobalFlow::MkNoEdge() }
396+
397+
/** Holds if a call step has been used (possibly preceeded by return steps). */
398+
predicate hasCall() { this = GlobalFlow::MkHasCall() }
399+
400+
/** Holds if either `start()` or `hasCall()` holds */
401+
predicate hasCallOrIsStart() { this.start() or this.hasCall() }
402+
403+
/**
404+
* Holds if the two trackers are safe to combine, in the sense that
405+
* they don't make contradictory assumptions what context they're in.
406+
*
407+
* This is approximate and will reject any pair of trackers that have
408+
* both used a call or locally came from the same disjunction.
409+
*/
410+
pragma[inline]
411+
predicate isSafeToCombineWith(Tracker other) {
412+
not (
413+
// Both values came from a call, they could come from different call sites.
414+
this.hasCall() and
415+
other.hasCall()
416+
or
417+
// Both values came from the same disjunction, they could come from different branches.
418+
this = other and
419+
this instanceof GlobalFlow::MkDisjunction
420+
)
421+
}
422+
423+
/** Gets a string representation of this element. */
424+
string toString() {
425+
this instanceof GlobalFlow::MkNoEdge and
426+
result = "Tracker in initial state"
427+
or
428+
this instanceof GlobalFlow::MkHasCall and
429+
result = "Tracker with calls"
430+
or
431+
this instanceof GlobalFlow::EdgeLabel and
432+
result = "Tracker with return step out of " + this.(GlobalFlow::EdgeLabel).toString()
433+
}
434+
}
435+
436+
module Tracker {
437+
/** Gets a valid end-point for tracking. */
438+
Tracker end() { any() }
439+
440+
/** Gets a valid end-point for tracking where no calls were used. */
441+
Tracker endNoCall() { not result.hasCall() }
442+
}

0 commit comments

Comments
 (0)