Skip to content

Commit 07a70af

Browse files
authored
Python: Limit set of globals that may be built-ins
I am very tempted to leave out the constants, or at the very least `False`, `True`, and `None`, as these have _many_ occurrences in the average codebase, and are not terribly useful at the API-graph level. If we really do want to capture "nodes that refer to such and such constant", then I think a better solution would be to create classes extending `DataFlow::Node` to facilitate this.
1 parent d288b92 commit 07a70af

File tree

1 file changed

+62
-8
lines changed

1 file changed

+62
-8
lines changed

python/ql/src/semmle/python/ApiGraphs.qll

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -351,20 +351,74 @@ module API {
351351

352352
private import semmle.python.types.Builtins as Builtins
353353

354+
/** Returns the names of known built-ins. */
355+
private string builtin_name() {
356+
// Built-in functions shared between Python 2.7.6 and 3.9.5
357+
result in [
358+
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
359+
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
360+
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
361+
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
362+
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
363+
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
364+
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__"
365+
]
366+
or
367+
// Built-in constants shared between Python 2.7.6 and 3.9.5
368+
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
369+
or
370+
// Python 3.9.5 only
371+
major_version() = 3 and
372+
result in ["ascii", "breakpoint", "bytes", "exec"]
373+
or
374+
// Python 2.7.6 only
375+
major_version() = 2 and
376+
result in [
377+
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
378+
"unichr", "unicode", "xrange"
379+
]
380+
}
381+
354382
/**
355383
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
356384
*
357-
* Currently this is an over-approximation, and does not account for things like overwriting a
385+
* Currently this is an over-approximation, and may not account for things like overwriting a
358386
* built-in with a different value.
359387
*/
360388
private DataFlow::Node likely_builtin(string name) {
361-
result.asCfgNode() =
362-
any(NameNode n |
363-
n.isGlobal() and
364-
n.isLoad() and
365-
name = n.getId() and
366-
name in [any(Builtins::Builtin b).getName(), "None", "True", "False"]
367-
)
389+
exists(Module m |
390+
result.asCfgNode() =
391+
any(NameNode n |
392+
possible_builtin_accessed_in_module(n, name, m) and
393+
not possible_builtin_defined_in_module(name, m)
394+
)
395+
)
396+
}
397+
398+
/**
399+
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
400+
* a value in the module `m`.
401+
*/
402+
private predicate possible_builtin_defined_in_module(string name, Module m) {
403+
exists(NameNode n |
404+
n.isGlobal() and
405+
n.isStore() and
406+
name = n.getId() and
407+
name = builtin_name() and
408+
m = n.getEnclosingModule()
409+
)
410+
}
411+
412+
/**
413+
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
414+
* built-in) inside the module `m`.
415+
*/
416+
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
417+
n.isGlobal() and
418+
n.isLoad() and
419+
name = n.getId() and
420+
name = builtin_name() and
421+
m = n.getEnclosingModule()
368422
}
369423

370424
/**

0 commit comments

Comments
 (0)