Skip to content

Commit 1d9f8c2

Browse files
authored
Merge pull request github#5427 from RasmusWL/use-new-builtin-modeling
Approved by yoff
2 parents 7c20c4a + 27032af commit 1d9f8c2

File tree

3 files changed

+19
-133
lines changed

3 files changed

+19
-133
lines changed

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 17 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.DataFlow
88
private import semmle.python.dataflow.new.TaintTracking
99
private import semmle.python.dataflow.new.RemoteFlowSources
1010
private import semmle.python.Concepts
11+
private import semmle.python.ApiGraphs
1112
private import PEP249
1213

1314
/** Provides models for the Python standard library. */
@@ -684,96 +685,35 @@ private module Stdlib {
684685
// ---------------------------------------------------------------------------
685686
// builtins
686687
// ---------------------------------------------------------------------------
687-
/** Gets a reference to the `builtins` module (called `__builtin__` in Python 2). */
688-
private DataFlow::Node builtins(DataFlow::TypeTracker t) {
689-
t.start() and
690-
result = DataFlow::importNode(["builtins", "__builtin__"])
691-
or
692-
exists(DataFlow::TypeTracker t2 | result = builtins(t2).track(t2, t))
693-
}
694-
695-
/** Gets a reference to the `builtins` module. */
696-
DataFlow::Node builtins() { result = builtins(DataFlow::TypeTracker::end()) }
697-
698-
/**
699-
* Gets a reference to the attribute `attr_name` of the `builtins` module.
700-
* WARNING: Only holds for a few predefined attributes.
701-
*/
702-
private DataFlow::Node builtins_attr(DataFlow::TypeTracker t, string attr_name) {
703-
attr_name in ["exec", "eval", "compile", "open"] and
704-
(
705-
t.start() and
706-
result = DataFlow::importNode(["builtins", "__builtin__"] + "." + attr_name)
707-
or
708-
t.startInAttr(attr_name) and
709-
result = DataFlow::importNode(["builtins", "__builtin__"])
710-
or
711-
// special handling of builtins, that are in scope without any imports
712-
// TODO: Take care of overrides, either `def eval: ...`, `eval = ...`, or `builtins.eval = ...`
713-
t.start() and
714-
exists(NameNode ref | result.asCfgNode() = ref |
715-
ref.isGlobal() and
716-
ref.getId() = attr_name and
717-
ref.isLoad()
718-
)
719-
)
720-
or
721-
// Due to bad performance when using normal setup with `builtins_attr(t2, attr_name).track(t2, t)`
722-
// we have inlined that code and forced a join
723-
exists(DataFlow::TypeTracker t2 |
724-
exists(DataFlow::StepSummary summary |
725-
builtins_attr_first_join(t2, attr_name, result, summary) and
726-
t = t2.append(summary)
727-
)
728-
)
729-
}
730-
731-
pragma[nomagic]
732-
private predicate builtins_attr_first_join(
733-
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
734-
) {
735-
DataFlow::StepSummary::step(builtins_attr(t2, attr_name), res, summary)
736-
}
737-
738-
/**
739-
* Gets a reference to the attribute `attr_name` of the `builtins` module.
740-
* WARNING: Only holds for a few predefined attributes.
741-
*/
742-
private DataFlow::Node builtins_attr(string attr_name) {
743-
result = builtins_attr(DataFlow::TypeTracker::end(), attr_name)
744-
}
745-
746688
/**
747689
* A call to the builtin `exec` function.
748690
* See https://docs.python.org/3/library/functions.html#exec
749691
*/
750-
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CfgNode {
751-
override CallNode node;
692+
private class BuiltinsExecCall extends CodeExecution::Range, DataFlow::CallCfgNode {
693+
BuiltinsExecCall() { this = API::builtin("exec").getACall() }
752694

753-
BuiltinsExecCall() { node.getFunction() = builtins_attr("exec").asCfgNode() }
754-
755-
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
695+
override DataFlow::Node getCode() { result = this.getArg(0) }
756696
}
757697

758698
/**
759699
* A call to the builtin `eval` function.
760700
* See https://docs.python.org/3/library/functions.html#eval
761701
*/
762-
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CfgNode {
702+
private class BuiltinsEvalCall extends CodeExecution::Range, DataFlow::CallCfgNode {
763703
override CallNode node;
764704

765-
BuiltinsEvalCall() { node.getFunction() = builtins_attr("eval").asCfgNode() }
705+
BuiltinsEvalCall() { this = API::builtin("eval").getACall() }
766706

767-
override DataFlow::Node getCode() { result.asCfgNode() = node.getArg(0) }
707+
override DataFlow::Node getCode() { result = this.getArg(0) }
768708
}
769709

770710
/** An additional taint step for calls to the builtin function `compile` */
771711
private class BuiltinsCompileCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
772712
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
773-
exists(CallNode call |
774-
nodeTo.asCfgNode() = call and
775-
call.getFunction() = builtins_attr("compile").asCfgNode() and
776-
nodeFrom.asCfgNode() in [call.getArg(0), call.getArgByName("source")]
713+
exists(DataFlow::CallCfgNode call |
714+
nodeTo = call and
715+
call = API::builtin("compile").getACall() and
716+
nodeFrom in [call.getArg(0), call.getArgByName("source")]
777717
)
778718
}
779719
}
@@ -782,23 +722,22 @@ private module Stdlib {
782722
* A call to the builtin `open` function.
783723
* See https://docs.python.org/3/library/functions.html#open
784724
*/
785-
private class OpenCall extends FileSystemAccess::Range, DataFlow::CfgNode {
786-
override CallNode node;
787-
725+
private class OpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
788726
OpenCall() {
789-
node.getFunction() = builtins_attr("open").asCfgNode()
727+
this = API::builtin("open").getACall()
790728
or
791-
node.getFunction() = io_attr("open").asCfgNode()
729+
// io.open is a special case, since it is an alias for the builtin `open`
730+
this = API::moduleImport("io").getMember("open").getACall()
792731
}
793732

794733
override DataFlow::Node getAPathArgument() {
795-
result.asCfgNode() in [node.getArg(0), node.getArgByName("file")]
734+
result in [this.getArg(0), this.getArgByName("file")]
796735
}
797736
}
798737

799738
/**
800739
* An exec statement (only Python 2).
801-
* Se ehttps://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
740+
* See https://docs.python.org/2/reference/simple_stmts.html#the-exec-statement.
802741
*/
803742
private class ExecStatement extends CodeExecution::Range {
804743
ExecStatement() {
@@ -942,59 +881,6 @@ private module Stdlib {
942881
}
943882
}
944883

945-
// ---------------------------------------------------------------------------
946-
// io
947-
// ---------------------------------------------------------------------------
948-
/** Gets a reference to the `io` module. */
949-
private DataFlow::Node io(DataFlow::TypeTracker t) {
950-
t.start() and
951-
result = DataFlow::importNode("io")
952-
or
953-
exists(DataFlow::TypeTracker t2 | result = io(t2).track(t2, t))
954-
}
955-
956-
/** Gets a reference to the `io` module. */
957-
DataFlow::Node io() { result = io(DataFlow::TypeTracker::end()) }
958-
959-
/**
960-
* Gets a reference to the attribute `attr_name` of the `io` module.
961-
* WARNING: Only holds for a few predefined attributes.
962-
*/
963-
private DataFlow::Node io_attr(DataFlow::TypeTracker t, string attr_name) {
964-
attr_name in ["open"] and
965-
(
966-
t.start() and
967-
result = DataFlow::importNode("io" + "." + attr_name)
968-
or
969-
t.startInAttr(attr_name) and
970-
result = io()
971-
)
972-
or
973-
// Due to bad performance when using normal setup with `io_attr(t2, attr_name).track(t2, t)`
974-
// we have inlined that code and forced a join
975-
exists(DataFlow::TypeTracker t2 |
976-
exists(DataFlow::StepSummary summary |
977-
io_attr_first_join(t2, attr_name, result, summary) and
978-
t = t2.append(summary)
979-
)
980-
)
981-
}
982-
983-
pragma[nomagic]
984-
private predicate io_attr_first_join(
985-
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
986-
) {
987-
DataFlow::StepSummary::step(io_attr(t2, attr_name), res, summary)
988-
}
989-
990-
/**
991-
* Gets a reference to the attribute `attr_name` of the `io` module.
992-
* WARNING: Only holds for a few predefined attributes.
993-
*/
994-
private DataFlow::Node io_attr(string attr_name) {
995-
result = io_attr(DataFlow::TypeTracker::end(), attr_name)
996-
}
997-
998884
// ---------------------------------------------------------------------------
999885
// json
1000886
// ---------------------------------------------------------------------------

python/ql/test/experimental/library-tests/frameworks/stdlib/CodeExecutionPossibleFP1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ def eval(*args, **kwargs):
88

99

1010
# This function call might be marked as a code execution, but it actually isn't.
11-
eval("print(42)") # $ SPURIOUS: getCode="print(42)"
11+
eval("print(42)")

python/ql/test/experimental/library-tests/frameworks/stdlib/CodeExecutionPossibleFP2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ def foo(*args, **kwargs):
1010
eval = foo
1111

1212
# This function call might be marked as a code execution, but it actually isn't.
13-
eval("print(42)") # $ SPURIOUS: getCode="print(42)"
13+
eval("print(42)")

0 commit comments

Comments
 (0)