Skip to content

Commit c26c68c

Browse files
authored
Merge pull request #14617 from yoff/python/module-for-import-time-flow
Python: module for import time flow
2 parents 6d859da + f7a8a8a commit c26c68c

File tree

2 files changed

+85
-67
lines changed

2 files changed

+85
-67
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 84 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,86 @@ module EssaFlow {
381381
//--------
382382
// Local flow
383383
//--------
384+
/** A module for transforming step relations. */
385+
module StepRelationTransformations {
386+
/**
387+
* Holds if there is a step from `nodeFrom` to `nodeTo` in
388+
* the step relation to be transformed.
389+
*
390+
* This is the input relation to the transformations.
391+
*/
392+
signature predicate stepSig(Node nodeFrom, Node nodeTo);
393+
394+
/**
395+
* A module to separate import-time from run-time.
396+
*
397+
* We really have two local flow relations, one for module initialisation time (or _import time_) and one for runtime.
398+
* Consider a read from a global variable `x = foo`. At import time there should be a local flow step from `foo` to `x`,
399+
* while at runtime there should be a jump step from the module variable corresponding to `foo` to `x`.
400+
*
401+
* Similarly, for a write `foo = y`, at import time, there is a local flow step from `y` to `foo` while at runtime there
402+
* is a jump step from `y` to the module variable corresponding to `foo`.
403+
*
404+
* We need a way of distinguishing if we are looking at import time or runtime. We have the following helpful facts:
405+
* - All top-level executable statements are import time (and import time only)
406+
* - All non-top-level code may be executed at runtime (but could also be executed at import time)
407+
*
408+
* We could write an analysis to determine which functions are called at import time, but until we have that, we will go
409+
* with the heuristic that global variables act according to import time rules at top-level program points and according
410+
* to runtime rules everywhere else. This will forego some import time local flow but otherwise be consistent.
411+
*/
412+
module PhaseDependentFlow<stepSig/2 rawStep> {
413+
/**
414+
* Holds if `node` is found at the top level of a module.
415+
*/
416+
pragma[inline]
417+
private predicate isTopLevel(Node node) { node.getScope() instanceof Module }
418+
419+
/** Holds if a step can be taken from `nodeFrom` to `nodeTo` at import time. */
420+
predicate importTimeStep(Node nodeFrom, Node nodeTo) {
421+
// As a proxy for whether statements can be executed at import time,
422+
// we check if they appear at the top level.
423+
// This will miss statements inside functions called from the top level.
424+
isTopLevel(nodeFrom) and
425+
isTopLevel(nodeTo) and
426+
rawStep(nodeFrom, nodeTo)
427+
}
428+
429+
/** Holds if a step can be taken from `nodeFrom` to `nodeTo` at runtime. */
430+
predicate runtimeStep(Node nodeFrom, Node nodeTo) {
431+
// Anything not at the top level can be executed at runtime.
432+
not isTopLevel(nodeFrom) and
433+
not isTopLevel(nodeTo) and
434+
rawStep(nodeFrom, nodeTo)
435+
}
436+
437+
/**
438+
* Holds if a step can be taken from `nodeFrom` to `nodeTo`.
439+
*/
440+
predicate step(Node nodeFrom, Node nodeTo) {
441+
importTimeStep(nodeFrom, nodeTo) or
442+
runtimeStep(nodeFrom, nodeTo)
443+
}
444+
}
445+
446+
/**
447+
* A module to add steps from post-update nodes.
448+
* Whenever there is a step from `x` to `y`,
449+
* we add a step from `[post] x` to `y`.
450+
*/
451+
module IncludePostUpdateFlow<stepSig/2 rawStep> {
452+
predicate step(Node nodeFrom, Node nodeTo) {
453+
// We either have a raw step from `nodeFrom`...
454+
rawStep(nodeFrom, nodeTo)
455+
or
456+
// ...or we have a raw step from a pre-update node of `nodeFrom`
457+
rawStep(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo)
458+
}
459+
}
460+
}
461+
462+
import StepRelationTransformations
463+
384464
/**
385465
* This is the local flow predicate that is used as a building block in global
386466
* data flow.
@@ -401,69 +481,16 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
401481
* or at runtime when callables in the module are called.
402482
*/
403483
predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) {
404-
// If there is local flow out of a node `node`, we want flow
405-
// both out of `node` and any post-update node of `node`.
406-
exists(Node node |
407-
nodeFrom = update(node) and
408-
(
409-
importTimeLocalFlowStep(node, nodeTo) or
410-
runtimeLocalFlowStep(node, nodeTo)
411-
)
412-
)
413-
}
414-
415-
/**
416-
* Holds if `node` is found at the top level of a module.
417-
*/
418-
pragma[inline]
419-
predicate isTopLevel(Node node) { node.getScope() instanceof Module }
420-
421-
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at import time. */
422-
predicate importTimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
423-
// As a proxy for whether statements can be executed at import time,
424-
// we check if they appear at the top level.
425-
// This will miss statements inside functions called from the top level.
426-
isTopLevel(nodeFrom) and
427-
isTopLevel(nodeTo) and
428-
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
429-
}
430-
431-
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at runtime. */
432-
predicate runtimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
433-
// Anything not at the top level can be executed at runtime.
434-
not isTopLevel(nodeFrom) and
435-
not isTopLevel(nodeTo) and
436-
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
484+
IncludePostUpdateFlow<PhaseDependentFlow<EssaFlow::essaFlowStep/2>::step/2>::step(nodeFrom, nodeTo)
437485
}
438486

439-
predicate summaryFlowSteps(Node nodeFrom, Node nodeTo) {
440-
// If there is local flow out of a node `node`, we want flow
441-
// both out of `node` and any post-update node of `node`.
442-
exists(Node node |
443-
nodeFrom = update(node) and
444-
(
445-
importTimeSummaryFlowStep(node, nodeTo) or
446-
runtimeSummaryFlowStep(node, nodeTo)
447-
)
448-
)
449-
}
450-
451-
predicate importTimeSummaryFlowStep(Node nodeFrom, Node nodeTo) {
452-
// As a proxy for whether statements can be executed at import time,
453-
// we check if they appear at the top level.
454-
// This will miss statements inside functions called from the top level.
455-
isTopLevel(nodeFrom) and
456-
isTopLevel(nodeTo) and
487+
private predicate summaryLocalStep(Node nodeFrom, Node nodeTo) {
457488
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
458489
nodeTo.(FlowSummaryNode).getSummaryNode(), true)
459490
}
460491

461-
predicate runtimeSummaryFlowStep(Node nodeFrom, Node nodeTo) {
462-
// Anything not at the top level can be executed at runtime.
463-
not isTopLevel(nodeFrom) and
464-
not isTopLevel(nodeTo) and
465-
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.(FlowSummaryNode).getSummaryNode(),
466-
nodeTo.(FlowSummaryNode).getSummaryNode(), true)
492+
predicate summaryFlowSteps(Node nodeFrom, Node nodeTo) {
493+
IncludePostUpdateFlow<PhaseDependentFlow<summaryLocalStep/2>::step/2>::step(nodeFrom, nodeTo)
467494
}
468495

469496
/** `ModuleVariable`s are accessed via jump steps at runtime. */
@@ -487,15 +514,6 @@ predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
487514
)
488515
}
489516

490-
/**
491-
* Holds if `result` is either `node`, or the post-update node for `node`.
492-
*/
493-
private Node update(Node node) {
494-
result = node
495-
or
496-
result.(PostUpdateNode).getPreUpdateNode() = node
497-
}
498-
499517
//--------
500518
// Type pruning
501519
//--------

python/ql/test/experimental/dataflow/module-initialization/localFlow.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ module ImportTimeLocalFlowTest implements FlowTestSig {
1212
// results are displayed next to `nodeTo`, so we need a line to write on
1313
nodeTo.getLocation().getStartLine() > 0 and
1414
nodeTo.asVar() instanceof GlobalSsaVariable and
15-
DP::importTimeLocalFlowStep(nodeFrom, nodeTo)
15+
DP::PhaseDependentFlow<DP::EssaFlow::essaFlowStep/2>::importTimeStep(nodeFrom, nodeTo)
1616
}
1717
}
1818

0 commit comments

Comments
 (0)