Skip to content

Commit e745df6

Browse files
committed
Python: module for import time flow
The logic for separating local flow into _import time_ and _runtime_ was duplicated a few times. Create a module for it instead, and add a good qldoc.
1 parent 9cae488 commit e745df6

File tree

2 files changed

+60
-49
lines changed

2 files changed

+60
-49
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 59 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,60 @@ module EssaFlow {
381381
//--------
382382
// Local flow
383383
//--------
384+
signature predicate stepSig(Node nodeFrom, Node nodeTo);
385+
386+
/**
387+
* A module to separate import-time from run-time.
388+
*
389+
* We really have two local flow relations, on for module initialisation time (or _import time_) and one for runtime.
390+
* Consider a read from a global variable `x = foo`. At import time there should be a local flow step from `foo` to `x`,
391+
* while at runtime there should be a jump step from the module variable corresponding to `foo` to `x`.
392+
*
393+
* Similarly for, a write `foo = y`, at import time, there is a local flow step from `y` to `foo` while at runtime there
394+
* is a jump step from `y` to the module variable corresponding to `foo`.
395+
*
396+
* We need a way of distinguishing if we are looking at import time or runtime. We have the following helpful facts:
397+
* - All top-level executable statements are import time (and import time only)
398+
* - All -non-top-level code may be executed at runtime (but could also be executed at import time)
399+
*
400+
* We could write an analysis to determine which functions are called at import time, but until we have that, we will go
401+
* with the heuristic that global variables act according to import time rules at top-level program points and according
402+
* to runtime rules everywhere else. This will forego some import time local flow but otherwise be consistent.
403+
*/
404+
module Separate<stepSig/2 rawStep> {
405+
/**
406+
* Holds if `node` is found at the top level of a module.
407+
*/
408+
pragma[inline]
409+
predicate isTopLevel(Node node) { node.getScope() instanceof Module }
410+
411+
/** Holds if a step can be taken from `nodeFrom` to `nodeTo` at import time. */
412+
predicate importTimeStep(Node nodeFrom, Node nodeTo) {
413+
// As a proxy for whether statements can be executed at import time,
414+
// we check if they appear at the top level.
415+
// This will miss statements inside functions called from the top level.
416+
isTopLevel(nodeFrom) and
417+
isTopLevel(nodeTo) and
418+
rawStep(nodeFrom, nodeTo)
419+
}
420+
421+
/** Holds if a step can be taken from `nodeFrom` to `nodeTo` at runtime. */
422+
predicate runtimeStep(Node nodeFrom, Node nodeTo) {
423+
// Anything not at the top level can be executed at runtime.
424+
not isTopLevel(nodeFrom) and
425+
not isTopLevel(nodeTo) and
426+
rawStep(nodeFrom, nodeTo)
427+
}
428+
429+
/**
430+
* Holds if a step can be taken from `nodeFrom` to `nodeTo`.
431+
*/
432+
predicate step(Node nodeFrom, Node nodeTo) {
433+
importTimeStep(nodeFrom, nodeTo) or
434+
runtimeStep(nodeFrom, nodeTo)
435+
}
436+
}
437+
384438
/**
385439
* This is the local flow predicate that is used as a building block in global
386440
* data flow.
@@ -405,67 +459,24 @@ predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) {
405459
// both out of `node` and any post-update node of `node`.
406460
exists(Node node |
407461
nodeFrom = update(node) and
408-
(
409-
importTimeLocalFlowStep(node, nodeTo) or
410-
runtimeLocalFlowStep(node, nodeTo)
411-
)
462+
Separate<EssaFlow::essaFlowStep/2>::step(node, nodeTo)
412463
)
413464
}
414465

415-
/**
416-
* Holds if `node` is found at the top level of a module.
417-
*/
418-
pragma[inline]
419-
predicate isTopLevel(Node node) { node.getScope() instanceof Module }
420-
421-
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at import time. */
422-
predicate importTimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
423-
// As a proxy for whether statements can be executed at import time,
424-
// we check if they appear at the top level.
425-
// This will miss statements inside functions called from the top level.
426-
isTopLevel(nodeFrom) and
427-
isTopLevel(nodeTo) and
428-
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
429-
}
430-
431-
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at runtime. */
432-
predicate runtimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
433-
// Anything not at the top level can be executed at runtime.
434-
not isTopLevel(nodeFrom) and
435-
not isTopLevel(nodeTo) and
436-
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
466+
private predicate summaryLocalStep(Node nodeFrom, Node nodeTo) {
467+
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
468+
nodeTo.(FlowSummaryNode).getSummaryNode(), true)
437469
}
438470

439471
predicate summaryFlowSteps(Node nodeFrom, Node nodeTo) {
440472
// If there is local flow out of a node `node`, we want flow
441473
// both out of `node` and any post-update node of `node`.
442474
exists(Node node |
443475
nodeFrom = update(node) and
444-
(
445-
importTimeSummaryFlowStep(node, nodeTo) or
446-
runtimeSummaryFlowStep(node, nodeTo)
447-
)
476+
Separate<summaryLocalStep/2>::step(node, nodeTo)
448477
)
449478
}
450479

451-
predicate importTimeSummaryFlowStep(Node nodeFrom, Node nodeTo) {
452-
// As a proxy for whether statements can be executed at import time,
453-
// we check if they appear at the top level.
454-
// This will miss statements inside functions called from the top level.
455-
isTopLevel(nodeFrom) and
456-
isTopLevel(nodeTo) and
457-
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
458-
nodeTo.(FlowSummaryNode).getSummaryNode(), true)
459-
}
460-
461-
predicate runtimeSummaryFlowStep(Node nodeFrom, Node nodeTo) {
462-
// Anything not at the top level can be executed at runtime.
463-
not isTopLevel(nodeFrom) and
464-
not isTopLevel(nodeTo) and
465-
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
466-
nodeTo.(FlowSummaryNode).getSummaryNode(), true)
467-
}
468-
469480
/** `ModuleVariable`s are accessed via jump steps at runtime. */
470481
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
471482
// Module variable read

python/ql/test/experimental/dataflow/module-initialization/localFlow.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ module ImportTimeLocalFlowTest implements FlowTestSig {
1212
// results are displayed next to `nodeTo`, so we need a line to write on
1313
nodeTo.getLocation().getStartLine() > 0 and
1414
nodeTo.asVar() instanceof GlobalSsaVariable and
15-
DP::importTimeLocalFlowStep(nodeFrom, nodeTo)
15+
DP::Separate<DP::EssaFlow::essaFlowStep/2>::importTimeStep(nodeFrom, nodeTo)
1616
}
1717
}
1818

0 commit comments

Comments
 (0)