Skip to content

Commit 0292ca6

Browse files
authored
Merge pull request github#5880 from tausbn/python-limit-builtins
Python: Limit set of globals that may be built-ins
2 parents 2a7ceb2 + c4bb3c2 commit 0292ca6

File tree

3 files changed

+100
-22
lines changed

3 files changed

+100
-22
lines changed

python/ql/src/Expressions/UseofInput.ql

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@
1111
*/
1212

1313
import python
14+
import semmle.python.dataflow.new.DataFlow
15+
import semmle.python.ApiGraphs
1416

15-
from CallNode call, Context context, ControlFlowNode func
17+
from DataFlow::CallCfgNode call
1618
where
17-
context.getAVersion().includes(2, _) and
18-
call.getFunction() = func and
19-
func.pointsTo(context, Value::named("input"), _) and
20-
not func.pointsTo(context, Value::named("raw_input"), _)
19+
major_version() = 2 and
20+
call = API::builtin("input").getACall() and
21+
call != API::builtin("raw_input").getACall()
2122
select call, "The unsafe built-in function 'input' is used in Python 2."

python/ql/src/semmle/python/ApiGraphs.qll

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -349,22 +349,95 @@ module API {
349349
)
350350
}
351351

352-
private import semmle.python.types.Builtins as Builtins
352+
/** Gets the name of a known built-in. */
353+
private string getBuiltInName() {
354+
// These lists were created by inspecting the `builtins` and `__builtin__` modules in
355+
// Python 3 and 2 respectively, using the `dir` built-in.
356+
// Built-in functions and exceptions shared between Python 2 and 3
357+
result in [
358+
"abs", "all", "any", "bin", "bool", "bytearray", "callable", "chr", "classmethod",
359+
"compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "filter",
360+
"float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex",
361+
"id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map",
362+
"max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print",
363+
"property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted",
364+
"staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", "__import__",
365+
// Exceptions
366+
"ArithmeticError", "AssertionError", "AttributeError", "BaseException", "BufferError",
367+
"BytesWarning", "DeprecationWarning", "EOFError", "EnvironmentError", "Exception",
368+
"FloatingPointError", "FutureWarning", "GeneratorExit", "IOError", "ImportError",
369+
"ImportWarning", "IndentationError", "IndexError", "KeyError", "KeyboardInterrupt",
370+
"LookupError", "MemoryError", "NameError", "NotImplemented", "NotImplementedError",
371+
"OSError", "OverflowError", "PendingDeprecationWarning", "ReferenceError", "RuntimeError",
372+
"RuntimeWarning", "StandardError", "StopIteration", "SyntaxError", "SyntaxWarning",
373+
"SystemError", "SystemExit", "TabError", "TypeError", "UnboundLocalError",
374+
"UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError",
375+
"UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError",
376+
// Added for compatibility
377+
"exec"
378+
]
379+
or
380+
// Built-in constants shared between Python 2 and 3
381+
result in ["False", "True", "None", "NotImplemented", "Ellipsis", "__debug__"]
382+
or
383+
// Python 3 only
384+
result in [
385+
"ascii", "breakpoint", "bytes", "exec",
386+
// Exceptions
387+
"BlockingIOError", "BrokenPipeError", "ChildProcessError", "ConnectionAbortedError",
388+
"ConnectionError", "ConnectionRefusedError", "ConnectionResetError", "FileExistsError",
389+
"FileNotFoundError", "InterruptedError", "IsADirectoryError", "ModuleNotFoundError",
390+
"NotADirectoryError", "PermissionError", "ProcessLookupError", "RecursionError",
391+
"ResourceWarning", "StopAsyncIteration", "TimeoutError"
392+
]
393+
or
394+
// Python 2 only
395+
result in [
396+
"basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload",
397+
"unichr", "unicode", "xrange"
398+
]
399+
}
353400

354401
/**
355402
* Gets a data flow node that is likely to refer to a built-in with the name `name`.
356403
*
357-
* Currently this is an over-approximation, and does not account for things like overwriting a
404+
* Currently this is an over-approximation, and may not account for things like overwriting a
358405
* built-in with a different value.
359406
*/
360407
private DataFlow::Node likely_builtin(string name) {
361-
result.asCfgNode() =
362-
any(NameNode n |
363-
n.isGlobal() and
364-
n.isLoad() and
365-
name = n.getId() and
366-
name in [any(Builtins::Builtin b).getName(), "None", "True", "False"]
367-
)
408+
exists(Module m |
409+
result.asCfgNode() =
410+
any(NameNode n |
411+
possible_builtin_accessed_in_module(n, name, m) and
412+
not possible_builtin_defined_in_module(name, m)
413+
)
414+
)
415+
}
416+
417+
/**
418+
* Holds if a global variable called `name` (which is also the name of a built-in) is assigned
419+
* a value in the module `m`.
420+
*/
421+
private predicate possible_builtin_defined_in_module(string name, Module m) {
422+
exists(NameNode n |
423+
not exists(LocalVariable v | n.defines(v)) and
424+
n.isStore() and
425+
name = n.getId() and
426+
name = getBuiltInName() and
427+
m = n.getEnclosingModule()
428+
)
429+
}
430+
431+
/**
432+
* Holds if `n` is an access of a global variable called `name` (which is also the name of a
433+
* built-in) inside the module `m`.
434+
*/
435+
private predicate possible_builtin_accessed_in_module(NameNode n, string name, Module m) {
436+
n.isGlobal() and
437+
n.isLoad() and
438+
name = n.getId() and
439+
name = getBuiltInName() and
440+
m = n.getEnclosingModule()
368441
}
369442

370443
/**

python/ql/test/experimental/dataflow/ApiGraphs/test.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -122,14 +122,18 @@ def my_print(x):
122122
print = my_print
123123
print("these words")
124124

125-
def local_redefine_range():
126-
range = 5
127-
return range
128-
129-
def global_redefine_range():
130-
global range
131-
range = 6
132-
return range #$ SPURIOUS: use=moduleImport("builtins").getMember("range")
125+
def local_redefine_chr():
126+
chr = 5
127+
return chr
128+
129+
def global_redefine_chr():
130+
global chr
131+
chr = 6
132+
return chr
133+
134+
def what_is_chr_now():
135+
# If global_redefine_chr has been run, then the following is _not_ a reference to the built-in chr
136+
return chr(123) #$ MISSING: use=moduleImport("builtins").getMember("chr").getReturn()
133137

134138
def obscured_print():
135139
p = print #$ use=moduleImport("builtins").getMember("print")

0 commit comments

Comments
 (0)