Skip to content

Commit d8857c7

Browse files
authored
Merge pull request #7246 from tausbn/python/import-star-flow
Python: Support flow through `import *`
2 parents a46787e + b871342 commit d8857c7

File tree

18 files changed

+268
-122
lines changed

18 files changed

+268
-122
lines changed

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 10 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -358,134 +358,26 @@ module API {
358358
)
359359
}
360360

361-
/** Gets the name of a known built-in. */
362-
private string getBuiltInName() {
363-
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
364-
// Python 3 and 2 respectively, using the `dir` built-in.
365-
// Built-in functions and exceptions shared between Python 2 and 3
366-
result in [
367-
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
368-
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
369-
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
370-
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
371-
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
372-
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
373-
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
374-
// Exceptions
375-
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
376-
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
377-
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
378-
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
379-
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
380-
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
381-
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
382-
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
383-
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
384-
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
385-
// Added for compatibility
386-
"exec"
387-
]
388-
or
389-
// Built-in constants shared between Python 2 and 3
390-
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
391-
or
392-
// Python 3 only
393-
result in [
394-
"ascii", "breakpoint", "bytes", "exec", "aiter", "anext",
395-
// Exceptions
396-
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
397-
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
398-
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
399-
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
400-
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
401-
]
402-
or
403-
// Python 2 only
404-
result in [
405-
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
406-
"unichr", "unicode", "xrange"
407-
]
408-
}
409-
410-
/**
411-
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
412-
*
413-
* Currently this is an over-approximation, and may not account for things like overwriting a
414-
* built-in with a different value.
415-
*/
416-
private DataFlow::Node likely_builtin(string name) {
417-
exists(Module m |
418-
result.asCfgNode() =
419-
any(NameNode n |
420-
possible_builtin_accessed_in_module(n, name, m) and
421-
not possible_builtin_defined_in_module(name, m)
422-
)
423-
)
424-
}
425-
426-
/**
427-
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
428-
* a value in the module `m`.
429-
*/
430-
private predicate possible_builtin_defined_in_module(string name, Module m) {
431-
global_name_defined_in_module(name, m) and
432-
name = getBuiltInName()
433-
}
434-
435-
/**
436-
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
437-
* built-in) inside the module `m`.
438-
*/
439-
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
440-
n.isGlobal() and
441-
n.isLoad() and
442-
name = n.getId() and
443-
name = getBuiltInName() and
444-
m = n.getEnclosingModule()
445-
}
446-
447-
/**
448-
* Holds if `n` is an access of a variable called `name` (which is _not_ the name of a
449-
* built-in, and which is _not_ a global defined in the enclosing module) inside the scope `s`.
450-
*/
451-
private predicate name_possibly_defined_in_import_star(NameNode n, string name, Scope s) {
452-
n.isLoad() and
453-
name = n.getId() and
454-
// Not already defined in an enclosing scope.
455-
not exists(LocalVariable v |
456-
v.getId() = name and v.getScope() = n.getScope().getEnclosingScope*()
457-
) and
458-
not name = getBuiltInName() and
459-
s = n.getScope().getEnclosingScope*() and
460-
exists(potential_import_star_base(s)) and
461-
not global_name_defined_in_module(name, n.getEnclosingModule())
462-
}
463-
464-
/** Holds if a global variable called `name` is assigned a value in the module `m`. */
465-
private predicate global_name_defined_in_module(string name, Module m) {
466-
exists(NameNode n |
467-
not exists(LocalVariable v | n.defines(v)) and
468-
n.isStore() and
469-
name = n.getId() and
470-
m = n.getEnclosingModule()
471-
)
472-
}
361+
private import semmle.python.dataflow.new.internal.Builtins
362+
private import semmle.python.dataflow.new.internal.ImportStar
473363

474364
/**
475365
* Gets the API graph node for all modules imported with `from ... import *` inside the scope `s`.
476366
*
477367
* For example, given
478368
*
479-
* `from foo.bar import *`
369+
* ```python
370+
* from foo.bar import *
371+
* ```
480372
*
481373
* this would be the API graph node with the path
482374
*
483375
* `moduleImport("foo").getMember("bar")`
484376
*/
485377
private TApiNode potential_import_star_base(Scope s) {
486-
exists(DataFlow::Node ref |
487-
ref.asCfgNode() = any(ImportStarNode n | n.getScope() = s).getModule() and
488-
use(result, ref)
378+
exists(DataFlow::Node n |
379+
n.asCfgNode() = ImportStar::potentialImportStarBase(s) and
380+
use(result, n)
489381
)
490382
}
491383

@@ -529,14 +421,14 @@ module API {
529421
or
530422
// Built-ins, treated as members of the module `builtins`
531423
base = MkModuleImport("builtins") and
532-
lbl = Label::member(any(string name | ref = likely_builtin(name)))
424+
lbl = Label::member(any(string name | ref = Builtins::likelyBuiltin(name)))
533425
or
534426
// Unknown variables that may belong to a module imported with `import *`
535427
exists(Scope s |
536428
base = potential_import_star_base(s) and
537429
lbl =
538430
Label::member(any(string name |
539-
name_possibly_defined_in_import_star(ref.asCfgNode(), name, s)
431+
ImportStar::namePossiblyDefinedInImportStar(ref.asCfgNode(), name, s)
540432
))
541433
)
542434
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/** Provides predicates for reasoning about built-ins in Python. */
2+
3+
private import python
4+
private import semmle.python.dataflow.new.DataFlow
5+
private import semmle.python.dataflow.new.internal.ImportStar
6+
7+
module Builtins {
8+
/** Gets the name of a known built-in. */
9+
string getBuiltinName() {
10+
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
11+
// Python 3 and 2 respectively, using the `dir` built-in.
12+
// Built-in functions and exceptions shared between Python 2 and 3
13+
result in [
14+
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
15+
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
16+
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
17+
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
18+
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
19+
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
20+
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
21+
// Exceptions
22+
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
23+
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
24+
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
25+
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
26+
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
27+
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
28+
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
29+
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
30+
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
31+
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
32+
// Added for compatibility
33+
"exec"
34+
]
35+
or
36+
// Built-in constants shared between Python 2 and 3
37+
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
38+
or
39+
// Python 3 only
40+
result in [
41+
"ascii", "breakpoint", "bytes", "exec",
42+
// Exceptions
43+
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
44+
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
45+
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
46+
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
47+
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
48+
]
49+
or
50+
// Python 2 only
51+
result in [
52+
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", "unichr",
53+
"unicode", "xrange"
54+
]
55+
}
56+
57+
/**
58+
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
59+
*
60+
* Currently this is an over-approximation, and may not account for things like overwriting a
61+
* built-in with a different value.
62+
*/
63+
DataFlow::Node likelyBuiltin(string name) {
64+
exists(Module m |
65+
result.asCfgNode() =
66+
any(NameNode n |
67+
possible_builtin_accessed_in_module(n, name, m) and
68+
not possible_builtin_defined_in_module(name, m)
69+
)
70+
)
71+
}
72+
73+
/**
74+
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
75+
* a value in the module `m`.
76+
*/
77+
private predicate possible_builtin_defined_in_module(string name, Module m) {
78+
ImportStar::globalNameDefinedInModule(name, m) and
79+
name = getBuiltinName()
80+
}
81+
82+
/**
83+
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
84+
* built-in) inside the module `m`.
85+
*/
86+
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
87+
n.isGlobal() and
88+
n.isLoad() and
89+
name = n.getId() and
90+
name = getBuiltinName() and
91+
m = n.getEnclosingModule()
92+
}
93+
}

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ private import python
22
private import DataFlowPublic
33
import semmle.python.SpecialMethods
44
private import semmle.python.essa.SsaCompute
5+
private import semmle.python.dataflow.new.internal.ImportStar
56

67
/** Gets the callable in which this node occurs. */
78
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
@@ -948,7 +949,7 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
948949
private predicate module_export(Module m, string name, CfgNode defn) {
949950
exists(EssaVariable v |
950951
v.getName() = name and
951-
v.getAUse() = m.getANormalExit()
952+
v.getAUse() = ImportStar::getStarImported*(m).getANormalExit()
952953
|
953954
defn.getNode() = v.getDefinition().(AssignmentDefinition).getValue()
954955
or

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import semmle.python.dataflow.new.TypeTracker
88
import Attributes
99
import LocalSources
1010
private import semmle.python.essa.SsaCompute
11+
private import semmle.python.dataflow.new.internal.ImportStar
1112

1213
/**
1314
* IPA type for data flow nodes.
@@ -30,7 +31,15 @@ newtype TNode =
3031
/** A synthetic node representing the value of an object after a state change. */
3132
TSyntheticPostUpdateNode(NeedsSyntheticPostUpdateNode pre) or
3233
/** A node representing a global (module-level) variable in a specific module. */
33-
TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m and v.escapes() } or
34+
TModuleVariableNode(Module m, GlobalVariable v) {
35+
v.getScope() = m and
36+
(
37+
v.escapes()
38+
or
39+
isAccessedThroughImportStar(m) and
40+
ImportStar::globalNameDefinedInModule(v.getId(), m)
41+
)
42+
} or
3443
/**
3544
* A node representing the overflow positional arguments to a call.
3645
* That is, `call` contains more positional arguments than there are
@@ -346,6 +355,8 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
346355
result.asCfgNode() = var.getALoad().getAFlowNode() and
347356
// Ignore reads that happen when the module is imported. These are only executed once.
348357
not result.getScope() = mod
358+
or
359+
this = import_star_read(result)
349360
}
350361

351362
/** Gets an `EssaNode` that corresponds to an assignment of this global variable. */
@@ -358,6 +369,13 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
358369
override Location getLocation() { result = mod.getLocation() }
359370
}
360371

372+
private predicate isAccessedThroughImportStar(Module m) { m = ImportStar::getStarImported(_) }
373+
374+
private ModuleVariableNode import_star_read(Node n) {
375+
ImportStar::importStarResolvesTo(n.asCfgNode(), result.getModule()) and
376+
n.asCfgNode().(NameNode).getId() = result.getVariable().getId()
377+
}
378+
361379
/**
362380
* The node holding the extra positional arguments to a call. This node is passed as a tuple
363381
* to the starred parameter of the callable.

0 commit comments

Comments
 (0)