@@ -2,6 +2,7 @@ private import codeql_ql.ast.Ast
2
2
private import internal.NodesInternal
3
3
private import internal.DataFlowNumbering
4
4
private import internal.LocalFlow as LocalFlow
5
+ private import internal.GlobalFlow as GlobalFlow
5
6
6
7
/**
7
8
* An expression or variable in a formula, including some additional nodes
@@ -10,6 +11,8 @@ private import internal.LocalFlow as LocalFlow
10
11
* Nodes that are locally bound together by equalities are clustered into a "super node",
11
12
* which can be accessed using `getSuperNode()`. There is usually no reason to use `Node` directly
12
13
* other than to reason about what kind of node is contained in a super node.
14
+ *
15
+ * To reason about global data flow, use `SuperNode.track()`.
13
16
*/
14
17
class Node extends TNode {
15
18
string toString ( ) { none ( ) } // overridden in subclasses
@@ -31,6 +34,8 @@ class Node extends TNode {
31
34
/**
32
35
* Gets the collection of data-flow nodes locally bound by equalities, represented
33
36
* by a "super node".
37
+ *
38
+ * Super nodes are the medium through which to propagate data-flow information globally.
34
39
*/
35
40
SuperNode getSuperNode ( ) { result .getANode ( ) = this }
36
41
}
@@ -224,6 +229,8 @@ Node fieldNode(Predicate pred, FieldDecl fieldDecl) {
224
229
225
230
/**
226
231
* A collection of data-flow nodes in the same predicate, locally bound by equalities.
232
+ *
233
+ * To reason about global data flow, use `SuperNode.track()`.
227
234
*/
228
235
class SuperNode extends LocalFlow:: TSuperNode {
229
236
private int repr ;
@@ -282,10 +289,154 @@ class SuperNode extends LocalFlow::TSuperNode {
282
289
result = this .getALocalMemberCall ( ) and
283
290
result .getMemberName ( ) = name
284
291
}
292
+
293
+ /**
294
+ * Gets a node that this node may "flow to" after one step.
295
+ *
296
+ * Basic usage of `track()` to track some expressions looks like this:
297
+ * ```
298
+ * DataFlow::SuperNode myThing(DataFlow::Tracker t) {
299
+ * t.start() and
300
+ * result = DataFlow::superNode(< some ast node >)
301
+ * or
302
+ * exists (DataFlow::Tracker t2 |
303
+ * result = myThing(t2).track(t2, t)
304
+ * )
305
+ * }
306
+ *
307
+ * DataFlow::SuperNode myThing() { result = myThing(DataFlow::Tracker::end()) }
308
+ * ```
309
+ */
310
+ pragma [ inline]
311
+ SuperNode track ( Tracker t1 , Tracker t2 ) {
312
+ // Return state -> return state
313
+ // Store the return edge in t2
314
+ not t1 .hasCall ( ) and
315
+ GlobalFlow:: directedEdgeSuper ( result , this , t2 )
316
+ or
317
+ // Call state or initial state -> call state
318
+ t1 .hasCallOrIsStart ( ) and
319
+ t2 .hasCall ( ) and
320
+ GlobalFlow:: directedEdgeSuper ( this , result , _)
321
+ or
322
+ // Return state -> call state
323
+ // The last-used return edge must not be used as the initial call edge
324
+ // (doing so would allow returning out of a disjunction and into another branch of that disjunction)
325
+ not t1 .hasCall ( ) and
326
+ t2 .hasCall ( ) and
327
+ exists ( GlobalFlow:: EdgeLabel edge |
328
+ GlobalFlow:: directedEdgeSuper ( this , result , edge ) and
329
+ edge != t1
330
+ )
331
+ }
332
+
333
+ /**
334
+ * Gets node containing a string flowing to this node via `t`.
335
+ */
336
+ cached
337
+ private string getAStringValue ( Tracker t ) {
338
+ t .start ( ) and
339
+ result = asAstNode ( ) .( String ) .getValue ( )
340
+ or
341
+ exists ( SuperNode pred , Tracker t2 |
342
+ this = pred .track ( t2 , t ) and
343
+ result = pred .getAStringValue ( t2 )
344
+ )
345
+ or
346
+ // Step through calls to a few built-ins that don't cause a blow-up
347
+ exists ( SuperNode pred , string methodName , string oldValue |
348
+ this .asAstNode ( ) = pred .getALocalMemberCall ( methodName ) and
349
+ oldValue = pred .getAStringValue ( t )
350
+ |
351
+ methodName = "toLowerCase" and
352
+ result = oldValue .toLowerCase ( )
353
+ or
354
+ methodName = "toUpperCase" and
355
+ result = oldValue .toUpperCase ( )
356
+ )
357
+ }
358
+
359
+ /** Gets a string constant that may flow here (possibly from a caller context). */
360
+ pragma [ inline]
361
+ string getAStringValue ( ) { result = this .getAStringValue ( Tracker:: end ( ) ) }
362
+
363
+ /** Gets a string constant that may flow here, possibly out of callees, but not from caller contexts. */
364
+ pragma [ inline]
365
+ string getAStringValueNoCall ( ) { result = this .getAStringValue ( Tracker:: endNoCall ( ) ) }
366
+
367
+ /**
368
+ * Gets a string constant that may flow here, which can safely be combined with another
369
+ * value that was tracked here with `otherT`.
370
+ *
371
+ * This is under-approximate and will fail to accept valid matches when both values
372
+ * came in from the same chain of calls.
373
+ */
374
+ bindingset [ otherT]
375
+ string getAStringValueForContext ( Tracker otherT ) {
376
+ exists ( Tracker stringT |
377
+ result = this .getAStringValue ( stringT ) and
378
+ otherT .isSafeToCombineWith ( stringT )
379
+ )
380
+ }
285
381
}
286
382
287
383
/** Gets the super node for the given AST node. */
288
384
pragma [ inline]
289
385
SuperNode superNode ( AstNode node ) {
290
386
result = astNode ( node ) .getSuperNode ( )
291
387
}
388
+
389
+ /**
390
+ * A summary of the steps needed to reach a node in the global data flow graph,
391
+ * to be used in combination with `SuperNode.track`.
392
+ */
393
+ class Tracker extends GlobalFlow:: TEdgeLabelOrTrackerState {
394
+ /** Holds if this is the starting point, that is, the summary of the empty path. */
395
+ predicate start ( ) { this = GlobalFlow:: MkNoEdge ( ) }
396
+
397
+ /** Holds if a call step has been used (possibly preceeded by return steps). */
398
+ predicate hasCall ( ) { this = GlobalFlow:: MkHasCall ( ) }
399
+
400
+ /** Holds if either `start()` or `hasCall()` holds */
401
+ predicate hasCallOrIsStart ( ) { this .start ( ) or this .hasCall ( ) }
402
+
403
+ /**
404
+ * Holds if the two trackers are safe to combine, in the sense that
405
+ * they don't make contradictory assumptions what context they're in.
406
+ *
407
+ * This is approximate and will reject any pair of trackers that have
408
+ * both used a call or locally came from the same disjunction.
409
+ */
410
+ pragma [ inline]
411
+ predicate isSafeToCombineWith ( Tracker other ) {
412
+ not (
413
+ // Both values came from a call, they could come from different call sites.
414
+ this .hasCall ( ) and
415
+ other .hasCall ( )
416
+ or
417
+ // Both values came from the same disjunction, they could come from different branches.
418
+ this = other and
419
+ this instanceof GlobalFlow:: MkDisjunction
420
+ )
421
+ }
422
+
423
+ /** Gets a string representation of this element. */
424
+ string toString ( ) {
425
+ this instanceof GlobalFlow:: MkNoEdge and
426
+ result = "Tracker in initial state"
427
+ or
428
+ this instanceof GlobalFlow:: MkHasCall and
429
+ result = "Tracker with calls"
430
+ or
431
+ this instanceof GlobalFlow:: EdgeLabel and
432
+ result = "Tracker with return step out of " + this .( GlobalFlow:: EdgeLabel ) .toString ( )
433
+ }
434
+ }
435
+
436
+ module Tracker {
437
+ /** Gets a valid end-point for tracking. */
438
+ Tracker end ( ) { any ( ) }
439
+
440
+ /** Gets a valid end-point for tracking where no calls were used. */
441
+ Tracker endNoCall ( ) { not result .hasCall ( ) }
442
+ }
0 commit comments