Skip to content

Commit 8517eff

Browse files
authored
Python: Fix bad performance
A few changes, all bundled together: - We were getting a lot of magic applied to the predicates in the `ImportStar` module, and this was causing needless re-evaluation. To address this, the easiest solution was to simply cache the entire module. - In order to separate this from the dataflow analysis and make it dependent only on control flow, `potentialImportStarBase` was changed to return a `ControlFlowNode`. - `isDefinedLocally` was defined on control flow nodes, which meant we were duplicating a lot of tuples due to control flow splitting, to no actual benefit. Finally, there was a really bad join in `isDefinedLocally` that was fixed by separating out a helper predicate. This is a case where we could use a three-way join, since the join between the `Scope`, the `name` string and the `Name` is big no matter what. If we join `scope_defines_name` with `n.getId()`, we'll get `Name`s belonging to irrelevant scopes. If we join `scope_defines_name` with the enclosing scope of the `Name` `n`, then we'll get this also for `Name`s that don't share their `getId` with the local variable defined in the scope. If we join `n.getId()` with `n.getScope()...` then we'll get all enclosing scopes for each `Name`. The last of these is what we currently have. It's not terrible, but not great either. (Though thankfully it's rare to have lots of enclosing scopes.)
1 parent 7cd9369 commit 8517eff

File tree

2 files changed

+23
-10
lines changed

2 files changed

+23
-10
lines changed

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,10 @@ module API {
375375
* `moduleImport("foo").getMember("bar")`
376376
*/
377377
private TApiNode potential_import_star_base(Scope s) {
378-
use(result, ImportStar::potentialImportStarBase(s))
378+
exists(DataFlow::Node n |
379+
n.asCfgNode() = ImportStar::potentialImportStarBase(s) and
380+
use(result, n)
381+
)
379382
}
380383

381384
/**

python/ql/lib/semmle/python/dataflow/new/internal/ImportStar.qll

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
11
/** Provides predicates for reasoning about uses of `import *` in Python. */
22

33
private import python
4-
private import semmle.python.dataflow.new.DataFlow
54
private import semmle.python.dataflow.new.internal.Builtins
65

6+
cached
77
module ImportStar {
88
/**
99
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
1010
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
1111
*/
12+
cached
1213
predicate namePossiblyDefinedInImportStar(NameNode n, string name, Scope s) {
1314
n.isLoad() and
1415
name = n.getId() and
1516
s = n.getScope().getEnclosingScope*() and
1617
exists(potentialImportStarBase(s)) and
1718
// Not already defined in an enclosing scope.
18-
not isDefinedLocally(n)
19+
not isDefinedLocally(n.getNode())
1920
}
2021

2122
/** Holds if `n` refers to a variable that is defined in the module in which it occurs. */
22-
private predicate isDefinedLocally(NameNode n) {
23+
cached
24+
private predicate isDefinedLocally(Name n) {
2325
// Defined in an enclosing scope
24-
exists(LocalVariable v |
25-
v.getId() = n.getId() and v.getScope() = n.getScope().getEnclosingScope*()
26-
)
26+
scope_defines_name(n.getScope().getEnclosingScope*(), n.getId())
2727
or
2828
// Defined as a built-in
2929
n.getId() = Builtins::getBuiltinName()
@@ -35,7 +35,14 @@ module ImportStar {
3535
n.getId() in ["__name__", "__package__"]
3636
}
3737

38+
/** Holds if the name `name` is defined in the scope `s` */
39+
pragma[nomagic]
40+
private predicate scope_defines_name(Scope s, string name) {
41+
exists(LocalVariable v | v.getId() = name and v.getScope() = s)
42+
}
43+
3844
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
45+
cached
3946
predicate globalNameDefinedInModule(string name, Module m) {
4047
exists(NameNode n |
4148
not exists(LocalVariable v | n.defines(v)) and
@@ -49,15 +56,17 @@ module ImportStar {
4956
* Holds if `n` may refer to a global variable of the same name in the module `m`, accessible
5057
* from the scope of `n` by a chain of `import *` imports.
5158
*/
59+
cached
5260
predicate importStarResolvesTo(NameNode n, Module m) {
5361
m = getStarImported+(n.getEnclosingModule()) and
5462
globalNameDefinedInModule(n.getId(), m) and
55-
not isDefinedLocally(n)
63+
not isDefinedLocally(n.getNode())
5664
}
5765

5866
/**
5967
* Gets a module that is imported from `m` via `import *`.
6068
*/
69+
cached
6170
Module getStarImported(Module m) {
6271
exists(ImportStar i |
6372
i.getScope() = m and result = i.getModule().pointsTo().(ModuleValue).getScope()
@@ -76,7 +85,8 @@ module ImportStar {
7685
*
7786
* this would return the data-flow nodes corresponding to `foo.bar` and `quux`.
7887
*/
79-
DataFlow::Node potentialImportStarBase(Scope s) {
80-
result.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule()
88+
cached
89+
ControlFlowNode potentialImportStarBase(Scope s) {
90+
result = any(ImportStarNode n | n.getScope() = s).getModule()
8191
}
8292
}

0 commit comments

Comments
 (0)