@@ -65,31 +65,75 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate
65
65
*/
66
66
module ImportResolution {
67
67
/**
68
- * Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
69
- * overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
70
- * not include `name`).
68
+ * Holds if there is an ESSA step from `defFrom` to `defTo`, which should be allowed
69
+ * for import resolution.
70
+ */
71
+ private predicate allowedEssaImportStep ( EssaDefinition defFrom , EssaDefinition defTo ) {
72
+ // to handle definitions guarded by if-then-else
73
+ defFrom = defTo .( PhiFunction ) .getAnInput ( )
74
+ or
75
+ // refined variable
76
+ // example: https://github.com/nvbn/thefuck/blob/ceeaeab94b5df5a4fe9d94d61e4f6b0bbea96378/thefuck/utils.py#L25-L45
77
+ defFrom = defTo .( EssaNodeRefinement ) .getInput ( ) .getDefinition ( )
78
+ }
79
+
80
+ /**
81
+ * Holds if the module `m` defines a name `name` with the value `val`. The value
82
+ * represents the value `name` will have at the end of the module (the last place we
83
+ * have def-use flow to).
84
+ *
85
+ * Note: The handling of re-exporting imports is a bit simplistic. We assume that if
86
+ * an import is made, it will be re-exported (which will not be the case if a new
87
+ * value is assigned to the name, or it is deleted).
71
88
*/
72
89
pragma [ nomagic]
73
- predicate module_export ( Module m , string name , DataFlow:: CfgNode defn ) {
74
- exists ( EssaVariable v , EssaDefinition essaDef |
75
- v .getName ( ) = name and
76
- v .getAUse ( ) = ImportStar:: getStarImported * ( m ) .getANormalExit ( ) and
77
- (
78
- essaDef = v .getDefinition ( )
79
- or
80
- // to handle definitions guarded by if-then-else
81
- essaDef = v .getDefinition ( ) .( PhiFunction ) .getAnInput ( )
82
- )
90
+ predicate module_export ( Module m , string name , DataFlow:: Node val ) {
91
+ // Definitions made inside `m` itself
92
+ //
93
+ // for code such as `foo = ...; foo.bar = ...` there will be TWO
94
+ // EssaDefinition/EssaVariable. One for `foo = ...` (AssignmentDefinition) and one
95
+ // for `foo.bar = ...`. The one for `foo.bar = ...` (EssaNodeRefinement). The
96
+ // EssaNodeRefinement is the one that will reach the end of the module (normal
97
+ // exit).
98
+ //
99
+ // However, we cannot just use the EssaNodeRefinement as the `val`, because the
100
+ // normal data-flow depends on use-use flow, and use-use flow targets CFG nodes not
101
+ // EssaNodes. So we need to go back from the EssaDefinition/EssaVariable that
102
+ // reaches the end of the module, to the first definition of the variable, and then
103
+ // track forwards using use-use flow to find a suitable CFG node that has flow into
104
+ // it from use-use flow.
105
+ exists ( EssaVariable lastUseVar , EssaVariable firstDef |
106
+ lastUseVar .getName ( ) = name and
107
+ // we ignore special variable $ introduced by our analysis (not used for anything)
108
+ // we ignore special variable * introduced by `from <pkg> import *` -- TODO: understand why we even have this?
109
+ not name in [ "$" , "*" ] and
110
+ lastUseVar .getAUse ( ) = m .getANormalExit ( ) and
111
+ allowedEssaImportStep * ( firstDef , lastUseVar ) and
112
+ not allowedEssaImportStep ( _, firstDef )
83
113
|
84
- defn .getNode ( ) = essaDef .( AssignmentDefinition ) .getValue ( )
114
+ not EssaFlow:: defToFirstUse ( firstDef , _) and
115
+ val .asVar ( ) = firstDef
85
116
or
86
- defn .getNode ( ) = essaDef .( ArgumentRefinement ) .getArgument ( )
117
+ exists ( ControlFlowNode mid , ControlFlowNode end |
118
+ EssaFlow:: defToFirstUse ( firstDef , mid ) and
119
+ EssaFlow:: useToNextUse * ( mid , end ) and
120
+ not EssaFlow:: useToNextUse ( end , _) and
121
+ val .asCfgNode ( ) = end
122
+ )
123
+ )
124
+ or
125
+ // re-exports from `from <pkg> import *`
126
+ exists ( Module importedFrom |
127
+ importedFrom = ImportStar:: getStarImported ( m ) and
128
+ module_export ( importedFrom , name , val ) and
129
+ potential_module_export ( importedFrom , name )
87
130
)
88
131
or
132
+ // re-exports from `import <pkg>` or `from <pkg> import <stuff>`
89
133
exists ( Alias a |
90
- defn .asExpr ( ) = [ a .getValue ( ) , a . getValue ( ) . ( ImportMember ) . getModule ( ) ] and
134
+ val .asExpr ( ) = a .getValue ( ) and
91
135
a .getAsname ( ) .( Name ) .getId ( ) = name and
92
- defn .getScope ( ) = m
136
+ val .getScope ( ) = m
93
137
)
94
138
}
95
139
@@ -263,9 +307,21 @@ module ImportResolution {
263
307
module_reexport ( reexporter , attr_name , m )
264
308
)
265
309
or
266
- // Submodules that are implicitly defined with relative imports of the form `from .foo import ...`.
267
- // In practice, we create a definition for each module in a package, even if it is not imported.
310
+ // submodules of packages will be available as `<pkg>.<submodule>` after doing
311
+ // `import <pkg>.<submodule>` at least once in the program, or can be directly
312
+ // imported with `from <pkg> import <submodule>` (even with an empty
313
+ // `<pkg>.__init__` file).
314
+ //
315
+ // Until an import of `<pkg>.<submodule>` is executed, it is technically possible
316
+ // that `<pkg>.<submodule>` (or `from <pkg> import <submodule>`) can refer to an
317
+ // attribute set in `<pkg>.__init__`.
318
+ //
319
+ // Therefore, if there is an attribute defined in `<pkg>.__init__` with the same
320
+ // name as a submodule, we always consider that this attribute _could_ be a
321
+ // reference to the submodule, even if we don't know that the submodule has been
322
+ // imported yet.
268
323
exists ( string submodule , Module package |
324
+ submodule = result .asVar ( ) .getName ( ) and
269
325
SsaSource:: init_module_submodule_defn ( result .asVar ( ) .getSourceVariable ( ) ,
270
326
package .getEntryNode ( ) ) and
271
327
m = getModuleFromName ( package .getPackageName ( ) + "." + submodule )
0 commit comments