Skip to content

Commit a09078a

Browse files
authored
Merge pull request github#14777 from yoff/python/remove-ssa-nodes-from-dataflow-graph
Python: remove EssaNodes
2 parents 33a0de0 + 9e1c818 commit a09078a

File tree

85 files changed

+1936
-1980
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+1936
-1980
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: fix
3+
---
4+
5+
- The dataflow graph no longer contains SSA variables. Instead, flow is directed via the corresponding controlflow nodes. This should make the graph and the flow simpler to understand. Minor improvements in flow computation has been observed, but in general negligible changes to alerts are expected.

python/ql/lib/semmle/python/Flow.qll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,10 @@ class ControlFlowNode extends @py_flow_node {
126126
cached
127127
string toString() {
128128
Stages::AST::ref() and
129-
exists(Scope s | s.getEntryNode() = this | result = "Entry node for " + s.toString())
129+
// Since modules can have ambigous names, entry nodes can too, if we do not collate them.
130+
exists(Scope s | s.getEntryNode() = this |
131+
result = "Entry node for " + concat( | | s.toString(), ",")
132+
)
130133
or
131134
exists(Scope s | s.getANormalExit() = this | result = "Exit node for " + s.toString())
132135
or

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 23 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -281,28 +281,33 @@ class DataFlowExpr = Expr;
281281
/**
282282
* A module to compute local flow.
283283
*
284-
* Flow will generally go from control flow nodes into essa variables at definitions,
284+
* Flow will generally go from control flow nodes for expressions into
285+
* control flow nodes for variables at definitions,
285286
* and from there via use-use flow to other control flow nodes.
286287
*
287288
* Some syntaxtic constructs are handled separately.
288289
*/
289290
module LocalFlow {
290-
/** Holds if `nodeFrom` is the control flow node defining the essa variable `nodeTo`. */
291+
/** Holds if `nodeFrom` is the expression defining the value for the variable `nodeTo`. */
291292
predicate definitionFlowStep(Node nodeFrom, Node nodeTo) {
292293
// Definition
293294
// `x = f(42)`
294-
// nodeFrom is `f(42)`, cfg node
295-
// nodeTo is `x`, essa var
296-
nodeFrom.(CfgNode).getNode() =
297-
nodeTo.(EssaNode).getVar().getDefinition().(AssignmentDefinition).getValue()
295+
// nodeFrom is `f(42)`
296+
// nodeTo is `x`
297+
exists(AssignmentDefinition def |
298+
nodeFrom.(CfgNode).getNode() = def.getValue() and
299+
nodeTo.(CfgNode).getNode() = def.getDefiningNode()
300+
)
298301
or
299302
// With definition
300303
// `with f(42) as x:`
301-
// nodeFrom is `f(42)`, cfg node
302-
// nodeTo is `x`, essa var
303-
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
304+
// nodeFrom is `f(42)`
305+
// nodeTo is `x`
306+
exists(With with, ControlFlowNode contextManager, WithDefinition withDef, ControlFlowNode var |
307+
var = withDef.getDefiningNode()
308+
|
304309
nodeFrom.(CfgNode).getNode() = contextManager and
305-
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
310+
nodeTo.(CfgNode).getNode() = var and
306311
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
307312
with.getContextExpr() = contextManager.getNode() and
308313
with.getOptionalVars() = var.getNode() and
@@ -313,34 +318,6 @@ module LocalFlow {
313318
// * `foo = x.foo(); await foo.async_method(); foo.close()` and
314319
// * `async with x.foo() as foo: await foo.async_method()`.
315320
)
316-
or
317-
// Async with var definition
318-
// `async with f(42) as x:`
319-
// nodeFrom is `x`, cfg node
320-
// nodeTo is `x`, essa var
321-
//
322-
// This makes the cfg node the local source of the awaited value.
323-
//
324-
// We have this step in addition to the step above, to handle cases where the QL
325-
// modeling of `f(42)` requires a `.getAwaited()` step (in API graphs) when not
326-
// using `async with`, so you can do both:
327-
// * `foo = await x.foo(); await foo.async_method(); foo.close()` and
328-
// * `async with x.foo() as foo: await foo.async_method()`.
329-
exists(With with, ControlFlowNode var |
330-
nodeFrom.(CfgNode).getNode() = var and
331-
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
332-
with.getOptionalVars() = var.getNode() and
333-
with.isAsync()
334-
)
335-
or
336-
// Parameter definition
337-
// `def foo(x):`
338-
// nodeFrom is `x`, cfgNode
339-
// nodeTo is `x`, essa var
340-
exists(ParameterDefinition pd |
341-
nodeFrom.(CfgNode).getNode() = pd.getDefiningNode() and
342-
nodeTo.(EssaNode).getVar() = pd.getVariable()
343-
)
344321
}
345322

346323
predicate expressionFlowStep(Node nodeFrom, Node nodeTo) {
@@ -372,9 +349,12 @@ module LocalFlow {
372349
// First use after definition
373350
// `y = 42`
374351
// `x = f(y)`
375-
// nodeFrom is `y` on first line, essa var
376-
// nodeTo is `y` on second line, cfg node
377-
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
352+
// nodeFrom is `y` on first line
353+
// nodeTo is `y` on second line
354+
exists(EssaDefinition def |
355+
nodeFrom.(CfgNode).getNode() = def.(EssaNodeDefinition).getDefiningNode() and
356+
AdjacentUses::firstUse(def, nodeTo.(CfgNode).getNode())
357+
)
378358
or
379359
// Next use after use
380360
// `x = f(y)`
@@ -565,11 +545,7 @@ predicate neverSkipInPathGraph(Node n) {
565545
// ```
566546
// we would end up saying that the path MUST not skip the x in `y = x`, which is just
567547
// annoying and doesn't help the path explanation become clearer.
568-
n.asVar() instanceof EssaDefinition and
569-
// For a parameter we have flow from ControlFlowNode to SSA node, and then onwards
570-
// with use-use flow, and since the CFN is already part of the path graph, we don't
571-
// want to force showing the SSA node as well.
572-
not n.asVar() instanceof ParameterDefinition
548+
n.asCfgNode() = any(EssaNodeDefinition def).getDefiningNode()
573549
}
574550

575551
/**
@@ -916,7 +892,7 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
916892
predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
917893
exists(ForTarget target |
918894
nodeFrom.asExpr() = target.getSource() and
919-
nodeTo.asVar().(EssaNodeDefinition).getDefiningNode() = target
895+
nodeTo.asCfgNode() = target
920896
) and
921897
(
922898
c instanceof ListElementContent

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ private import semmle.python.frameworks.data.ModelsAsData
2424
* The current implementation of these cross flows can be seen in `EssaTaintTracking`.
2525
*/
2626
newtype TNode =
27-
/** A node corresponding to an SSA variable. */
28-
TEssaNode(EssaVariable var) or
2927
/** A node corresponding to a control flow node. */
3028
TCfgNode(ControlFlowNode node) {
3129
isExpressionNode(node)
3230
or
3331
node.getNode() instanceof Pattern
32+
or
33+
node = any(ScopeEntryDefinition def).getDefiningNode()
3434
} or
3535
/**
3636
* A synthetic node representing the value of an object before a state change.
@@ -156,9 +156,6 @@ class Node extends TNode {
156156
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
157157
}
158158

159-
/** Gets the ESSA variable corresponding to this node, if any. */
160-
EssaVariable asVar() { none() }
161-
162159
/** Gets the control-flow node corresponding to this node, if any. */
163160
ControlFlowNode asCfgNode() { none() }
164161

@@ -171,25 +168,6 @@ class Node extends TNode {
171168
LocalSourceNode getALocalSource() { result.flowsTo(this) }
172169
}
173170

174-
/** A data-flow node corresponding to an SSA variable. */
175-
class EssaNode extends Node, TEssaNode {
176-
EssaVariable var;
177-
178-
EssaNode() { this = TEssaNode(var) }
179-
180-
/** Gets the `EssaVariable` represented by this data-flow node. */
181-
EssaVariable getVar() { result = var }
182-
183-
override EssaVariable asVar() { result = var }
184-
185-
/** Gets a textual representation of this element. */
186-
override string toString() { result = var.toString() }
187-
188-
override Scope getScope() { result = var.getScope() }
189-
190-
override Location getLocation() { result = var.getLocation() }
191-
}
192-
193171
/** A data-flow node corresponding to a control-flow node. */
194172
class CfgNode extends Node, TCfgNode {
195173
ControlFlowNode node;
@@ -412,8 +390,8 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
412390
}
413391

414392
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
415-
EssaNode getAWrite() {
416-
result.getVar().getDefinition().(EssaNodeDefinition).definedBy(var, any(DefinitionNode defn))
393+
Node getAWrite() {
394+
any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode))
417395
}
418396

419397
/** Gets the possible values of the variable at the end of import time */

python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ module ImportResolution {
112112
not allowedEssaImportStep(_, firstDef)
113113
|
114114
not LocalFlow::defToFirstUse(firstDef, _) and
115-
val.asVar() = firstDef
115+
val.asCfgNode() = firstDef.getDefinition().(EssaNodeDefinition).getDefiningNode()
116116
or
117117
exists(ControlFlowNode mid, ControlFlowNode end |
118118
LocalFlow::defToFirstUse(firstDef, mid) and
@@ -320,11 +320,11 @@ module ImportResolution {
320320
// name as a submodule, we always consider that this attribute _could_ be a
321321
// reference to the submodule, even if we don't know that the submodule has been
322322
// imported yet.
323-
exists(string submodule, Module package |
324-
submodule = result.asVar().getName() and
325-
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
326-
package.getEntryNode()) and
327-
m = getModuleFromName(package.getPackageName() + "." + submodule)
323+
exists(string submodule, Module package, EssaVariable var |
324+
submodule = var.getName() and
325+
SsaSource::init_module_submodule_defn(var.getSourceVariable(), package.getEntryNode()) and
326+
m = getModuleFromName(package.getPackageName() + "." + submodule) and
327+
result.asCfgNode() = var.getDefinition().(EssaNodeDefinition).getDefiningNode()
328328
)
329329
}
330330

python/ql/lib/semmle/python/dataflow/new/internal/IterableUnpacking.qll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,13 @@
8787
* This is adequate as the route through `TIterableElement(sequence)` does not transfer precise content.
8888
*
8989
* 5. [Read] Content is read from `sequence` to its elements.
90-
* a) If the element is a plain variable, the target is the corresponding essa node.
90+
* a) If the element is a plain variable, the target is the corresponding control flow node.
9191
*
9292
* b) If the element is itself a sequence, with control-flow node `seq`, the target is `TIterableSequence(seq)`.
9393
*
9494
* c) If the element is a starred variable, with control-flow node `v`, the target is `TIterableElement(v)`.
9595
*
96-
* 6. [Store] Content is stored from `TIterableElement(v)` to the essa variable for `v`, with
96+
* 6. [Store] Content is stored from `TIterableElement(v)` to the control flow node for variable `v`, with
9797
* content type `ListElementContent`.
9898
*
9999
* 7. [Flow, Read, Store] Steps 2 through 7 are repeated for all recursive elements which are sequences.
@@ -313,7 +313,7 @@ predicate iterableUnpackingConvertingStoreStep(Node nodeFrom, Content c, Node no
313313
* Step 5
314314
* For a sequence node inside an iterable unpacking, data flows from the sequence to its elements. There are
315315
* three cases for what `toNode` should be:
316-
* a) If the element is a plain variable, `toNode` is the corresponding essa node.
316+
* a) If the element is a plain variable, `toNode` is the corresponding control flow node.
317317
*
318318
* b) If the element is itself a sequence, with control-flow node `seq`, `toNode` is `TIterableSequence(seq)`.
319319
*
@@ -351,20 +351,25 @@ predicate iterableUnpackingElementReadStep(Node nodeFrom, Content c, Node nodeTo
351351
nodeTo = TIterableElementNode(element)
352352
else
353353
// Step 5a
354-
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = element
354+
exists(MultiAssignmentDefinition mad | element = mad.getDefiningNode() |
355+
nodeTo.(CfgNode).getNode() = element
356+
)
355357
)
356358
)
357359
}
358360

359361
/**
360362
* Step 6
361-
* Data flows from `TIterableElement(v)` to the essa variable for `v`, with
363+
* Data flows from `TIterableElement(v)` to the control flow node for variable `v`, with
362364
* content type `ListElementContent`.
363365
*/
364366
predicate iterableUnpackingStarredElementStoreStep(Node nodeFrom, Content c, Node nodeTo) {
365-
exists(ControlFlowNode starred | starred.getNode() instanceof Starred |
367+
exists(ControlFlowNode starred, MultiAssignmentDefinition mad |
368+
starred.getNode() instanceof Starred and
369+
starred = mad.getDefiningNode()
370+
|
366371
nodeFrom = TIterableElementNode(starred) and
367-
nodeTo.asVar().getDefinition().(MultiAssignmentDefinition).getDefiningNode() = starred and
372+
nodeTo.asCfgNode() = starred and
368373
c instanceof ListElementContent
369374
)
370375
}

python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class LocalSourceNode extends Node {
7171
or
7272
// We include all scope entry definitions, as these act as the local source within the scope they
7373
// enter.
74-
this.asVar() instanceof ScopeEntryDefinition
74+
this.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode()
7575
}
7676

7777
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
@@ -165,7 +165,7 @@ class LocalSourceNodeNotModuleVariableNode extends LocalSourceNode {
165165
LocalSourceNodeNotModuleVariableNode() {
166166
this instanceof ExprNode
167167
or
168-
this.asVar() instanceof ScopeEntryDefinition
168+
this.asCfgNode() = any(ScopeEntryDefinition def).getDefiningNode()
169169
}
170170
}
171171

python/ql/lib/semmle/python/dataflow/new/internal/MatchUnpacking.qll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ predicate matchAsFlowStep(Node nodeFrom, Node nodeTo) {
8989
or
9090
// the interior pattern flows to the alias
9191
nodeFrom.(CfgNode).getNode().getNode() = subject.getPattern() and
92-
nodeTo.(EssaNode).getVar().getDefinition().(PatternAliasDefinition).getDefiningNode().getNode() =
93-
alias
92+
exists(PatternAliasDefinition pad | pad.getDefiningNode().getNode() = alias |
93+
nodeTo.(CfgNode).getNode() = pad.getDefiningNode()
94+
)
9495
)
9596
}
9697

@@ -123,13 +124,9 @@ predicate matchLiteralFlowStep(Node nodeFrom, Node nodeTo) {
123124
predicate matchCaptureFlowStep(Node nodeFrom, Node nodeTo) {
124125
exists(MatchCapturePattern capture, Name var | capture.getVariable() = var |
125126
nodeFrom.(CfgNode).getNode().getNode() = capture and
126-
nodeTo
127-
.(EssaNode)
128-
.getVar()
129-
.getDefinition()
130-
.(PatternCaptureDefinition)
131-
.getDefiningNode()
132-
.getNode() = var
127+
exists(PatternCaptureDefinition pcd | pcd.getDefiningNode().getNode() = var |
128+
nodeTo.(CfgNode).getNode() = pcd.getDefiningNode()
129+
)
133130
)
134131
}
135132

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,10 @@ predicate awaitStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
216216
*/
217217
predicate asyncWithStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
218218
exists(With with, ControlFlowNode contextManager, ControlFlowNode var |
219+
var = any(WithDefinition wd).getDefiningNode()
220+
|
219221
nodeFrom.(DataFlow::CfgNode).getNode() = contextManager and
220-
nodeTo.(DataFlow::EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
222+
nodeTo.(DataFlow::CfgNode).getNode() = var and
221223
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
222224
with.getContextExpr() = contextManager.getNode() and
223225
with.getOptionalVars() = var.getNode() and

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,20 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
5050
}
5151

5252
predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
53-
exists(SsaSourceVariable var, DefinitionNode def | var.hasDefiningNode(def) |
54-
nodeTo.asVar().(ScopeEntryDefinition).getSourceVariable() = var and
53+
// Jump into a capturing scope.
54+
//
55+
// var = expr
56+
// ...
57+
// def f():
58+
// ..var is used..
59+
//
60+
// nodeFrom is `expr`
61+
// nodeTo is entry node for `f`
62+
exists(ScopeEntryDefinition e, SsaSourceVariable var, DefinitionNode def |
63+
e.getSourceVariable() = var and
64+
var.hasDefiningNode(def)
65+
|
66+
nodeTo.asCfgNode() = e.getDefiningNode() and
5567
nodeFrom.asCfgNode() = def.getValue() and
5668
var.getScope().getScope*() = nodeFrom.getScope()
5769
)
@@ -228,8 +240,7 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input {
228240
|
229241
param = FlowSummary::SummaryComponent::parameter(apos) and
230242
DataFlowDispatch::parameterMatch(ppos, apos) and
231-
// pick the SsaNode rather than the CfgNode
232-
result.asVar().getDefinition().(ParameterDefinition).getParameter() = p and
243+
result.asCfgNode().getNode() = p and
233244
(
234245
exists(int i | ppos.isPositional(i) |
235246
p = callable.getALocalSource().asExpr().(CallableExpr).getInnerScope().getArg(i)

0 commit comments

Comments
 (0)