Skip to content

Commit 7a6646d

Browse files
authored
Merge pull request github#8883 from erik-krogh/pyMaD
Python: add MaD implementation
2 parents a0ae8b3 + e557d88 commit 7a6646d

File tree

21 files changed

+1500
-119
lines changed

21 files changed

+1500
-119
lines changed

config/identical-files.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,8 @@
525525
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll",
526526
"java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll",
527527
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll",
528-
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll"
528+
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll",
529+
"python/ql/lib/semmle/python/frameworks/data/internal/AccessPathSyntax.qll"
529530
],
530531
"IncompleteUrlSubstringSanitization": [
531532
"javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll",
@@ -543,7 +544,8 @@
543544
],
544545
"ApiGraphModels": [
545546
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll",
546-
"ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll"
547+
"ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll",
548+
"python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll"
547549
],
548550
"TaintedFormatStringQuery Ruby/JS": [
549551
"javascript/ql/lib/semmle/javascript/security/dataflow/TaintedFormatStringQuery.qll",

javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ private class AccessPathRange extends AccessPath::Range {
299299
bindingset[token]
300300
API::Node getSuccessorFromNode(API::Node node, AccessPathToken token) {
301301
// API graphs use the same label for arguments and parameters. An edge originating from a
302-
// use-node represents be an argument, and an edge originating from a def-node represents a parameter.
302+
// use-node represents an argument, and an edge originating from a def-node represents a parameter.
303303
// We just map both to the same thing.
304304
token.getName() = ["Argument", "Parameter"] and
305305
result = node.getParameter(AccessPath::parseIntUnbounded(token.getAnArgument()))

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ module API {
136136
result = this.getASuccessor(Label::keywordParameter(name))
137137
}
138138

139+
/** Gets the node representing the self parameter */
140+
Node getSelfParameter() { result = this.getASuccessor(Label::selfParameter()) }
141+
139142
/**
140143
* Gets the number of parameters of the function represented by this node.
141144
*/
@@ -321,6 +324,12 @@ module API {
321324
/** Gets the API node for a parameter of this invocation. */
322325
Node getAParameter() { result = this.getParameter(_) }
323326

327+
/** Gets the object that this method-call is being called on, if this is a method-call */
328+
Node getSelfParameter() {
329+
result.getARhs() = this.(DataFlow::MethodCallNode).getObject() and
330+
result = callee.getSelfParameter()
331+
}
332+
324333
/** Gets the API node for the keyword parameter `name` of this invocation. */
325334
Node getKeywordParameter(string name) {
326335
result = callee.getKeywordParameter(name) and
@@ -345,6 +354,14 @@ module API {
345354
result = callee.getReturn() and
346355
result.getAnImmediateUse() = this
347356
}
357+
358+
/**
359+
* Gets the number of positional arguments of this call.
360+
*
361+
* Note: This is used for `WithArity[<n>]` in modeling-as-data, where we thought
362+
* including keyword arguments didn't make much sense.
363+
*/
364+
int getNumArgument() { result = count(this.getArg(_)) }
348365
}
349366

350367
/**
@@ -589,15 +606,24 @@ module API {
589606
exists(DataFlow::Node def, PY::CallableExpr fn |
590607
rhs(base, def) and fn = trackDefNode(def).asExpr()
591608
|
592-
exists(int i |
593-
lbl = Label::parameter(i) and
609+
exists(int i, int offset |
610+
if exists(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf())
611+
then offset = 1
612+
else offset = 0
613+
|
614+
lbl = Label::parameter(i - offset) and
594615
ref.asExpr() = fn.getInnerScope().getArg(i)
595616
)
596617
or
597-
exists(string name |
618+
exists(string name, PY::Parameter param |
598619
lbl = Label::keywordParameter(name) and
599-
ref.asExpr() = fn.getInnerScope().getArgByName(name)
620+
param = fn.getInnerScope().getArgByName(name) and
621+
not param.isSelf() and
622+
ref.asExpr() = param
600623
)
624+
or
625+
lbl = Label::selfParameter() and
626+
ref.asExpr() = any(PY::Parameter p | p = fn.getInnerScope().getAnArg() and p.isSelf())
601627
)
602628
or
603629
// Built-ins, treated as members of the module `builtins`
@@ -664,6 +690,9 @@ module API {
664690
exists(string name | lbl = Label::keywordParameter(name) |
665691
arg = pred.getACall().getArgByName(name)
666692
)
693+
or
694+
lbl = Label::selfParameter() and
695+
arg = pred.getACall().(DataFlow::MethodCallNode).getObject()
667696
)
668697
}
669698

@@ -780,6 +809,7 @@ module API {
780809
or
781810
exists(any(PY::Function f).getArgByName(name))
782811
} or
812+
MkLabelSelfParameter() or
783813
MkLabelReturn() or
784814
MkLabelSubclass() or
785815
MkLabelAwait()
@@ -837,6 +867,11 @@ module API {
837867
string getName() { result = name }
838868
}
839869

870+
/** A label for the self parameter. */
871+
class LabelSelfParameter extends ApiLabel, MkLabelSelfParameter {
872+
override string toString() { result = "getSelfParameter()" }
873+
}
874+
840875
/** A label that gets the return value of a function. */
841876
class LabelReturn extends ApiLabel, MkLabelReturn {
842877
override string toString() { result = "getReturn()" }
@@ -876,6 +911,9 @@ module API {
876911
/** Gets the `parameter` edge label for the keyword parameter `name`. */
877912
LabelKeywordParameter keywordParameter(string name) { result.getName() = name }
878913

914+
/** Gets the edge label for the self parameter. */
915+
LabelSelfParameter selfParameter() { any() }
916+
879917
/** Gets the `return` edge label. */
880918
LabelReturn return() { any() }
881919

python/ql/lib/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ private import semmle.python.frameworks.Asyncpg
1212
private import semmle.python.frameworks.ClickhouseDriver
1313
private import semmle.python.frameworks.Cryptodome
1414
private import semmle.python.frameworks.Cryptography
15+
private import semmle.python.frameworks.data.ModelsAsData
1516
private import semmle.python.frameworks.Dill
1617
private import semmle.python.frameworks.Django
1718
private import semmle.python.frameworks.Fabric

python/ql/lib/semmle/python/frameworks/Asyncpg.qll

Lines changed: 38 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -7,91 +7,42 @@ private import python
77
private import semmle.python.dataflow.new.DataFlow
88
private import semmle.python.Concepts
99
private import semmle.python.ApiGraphs
10+
private import semmle.python.frameworks.data.ModelsAsData
1011

1112
/** Provides models for the `asyncpg` PyPI package. */
1213
private module Asyncpg {
13-
private import semmle.python.internal.Awaited
14-
15-
/** Gets a `ConnectionPool` that is created when the result of `asyncpg.create_pool()` is awaited. */
16-
API::Node connectionPool() {
17-
result = API::moduleImport("asyncpg").getMember("create_pool").getReturn().getAwaited()
18-
}
19-
20-
/**
21-
* Gets a `Connection` that is created when
22-
* - the result of `asyncpg.connect()` is awaited.
23-
* - the result of calling `acquire` on a `ConnectionPool` is awaited.
24-
*/
25-
API::Node connection() {
26-
result = API::moduleImport("asyncpg").getMember("connect").getReturn().getAwaited()
27-
or
28-
result = connectionPool().getMember("acquire").getReturn().getAwaited()
29-
}
30-
31-
/** `Connection`s and `ConnectionPool`s provide some methods that execute SQL. */
32-
class SqlExecutionOnConnection extends SqlExecution::Range, DataFlow::MethodCallNode {
33-
string methodName;
34-
35-
SqlExecutionOnConnection() {
36-
this = [connectionPool(), connection()].getMember(methodName).getACall() and
37-
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval", "executemany"]
38-
}
39-
40-
override DataFlow::Node getSql() {
41-
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval"] and
42-
result in [this.getArg(0), this.getArgByName("query")]
43-
or
44-
methodName = "executemany" and
45-
result in [this.getArg(0), this.getArgByName("command")]
46-
}
47-
}
48-
49-
/** A model of `Connection` and `ConnectionPool`, which provide some methods that access the file system. */
50-
class FileAccessOnConnection extends FileSystemAccess::Range, DataFlow::MethodCallNode {
51-
string methodName;
52-
53-
FileAccessOnConnection() {
54-
this = [connectionPool(), connection()].getMember(methodName).getACall() and
55-
methodName in ["copy_from_query", "copy_from_table", "copy_to_table"]
56-
}
57-
58-
// The path argument is keyword only.
59-
override DataFlow::Node getAPathArgument() {
60-
methodName in ["copy_from_query", "copy_from_table"] and
61-
result = this.getArgByName("output")
62-
or
63-
methodName = "copy_to_table" and
64-
result = this.getArgByName("source")
14+
class AsyncpgModel extends ModelInput::TypeModelCsv {
15+
override predicate row(string row) {
16+
// package1;type1;package2;type2;path
17+
row =
18+
[
19+
// a `ConnectionPool` that is created when the result of `asyncpg.create_pool()` is awaited.
20+
"asyncpg;ConnectionPool;asyncpg;;Member[create_pool].ReturnValue.Awaited",
21+
// a `Connection` that is created when
22+
// * - the result of `asyncpg.connect()` is awaited.
23+
// * - the result of calling `acquire` on a `ConnectionPool` is awaited.
24+
"asyncpg;Connection;asyncpg;;Member[connect].ReturnValue.Awaited",
25+
"asyncpg;Connection;asyncpg;ConnectionPool;Member[acquire].ReturnValue.Awaited",
26+
// Creating an internal `~Connection` type that contains both `Connection` and `ConnectionPool`.
27+
"asyncpg;~Connection;asyncpg;Connection;", "asyncpg;~Connection;asyncpg;ConnectionPool;"
28+
]
6529
}
6630
}
6731

68-
/**
69-
* Provides models of the `PreparedStatement` class in `asyncpg`.
70-
* `PreparedStatement`s are created when the result of calling `prepare(query)` on a connection is awaited.
71-
* The result of calling `prepare(query)` is a `PreparedStatementFactory` and the argument, `query` needs to
72-
* be tracked to the place where a `PreparedStatement` is created and then further to any executing methods.
73-
* Hence the two type trackers.
74-
*/
75-
module PreparedStatement {
76-
class PreparedStatementConstruction extends SqlConstruction::Range, API::CallNode {
77-
PreparedStatementConstruction() { this = connection().getMember("prepare").getACall() }
78-
79-
override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() }
80-
}
81-
82-
class PreparedStatementExecution extends SqlExecution::Range, API::CallNode {
83-
PreparedStatementConstruction prepareCall;
84-
85-
PreparedStatementExecution() {
86-
this =
87-
prepareCall
88-
.getReturn()
89-
.getAwaited()
90-
.getMember(["executemany", "fetch", "fetchrow", "fetchval"])
91-
.getACall()
92-
}
93-
94-
override DataFlow::Node getSql() { result = prepareCall.getSql() }
32+
class AsyncpgSink extends ModelInput::SinkModelCsv {
33+
// package;type;path;kind
34+
override predicate row(string row) {
35+
row =
36+
[
37+
// `Connection`s and `ConnectionPool`s provide some methods that execute SQL.
38+
"asyncpg;~Connection;Member[copy_from_query,execute,fetch,fetchrow,fetchval].Argument[0,query:];sql-injection",
39+
"asyncpg;~Connection;Member[executemany].Argument[0,command:];sql-injection",
40+
// A model of `Connection` and `ConnectionPool`, which provide some methods that access the file system.
41+
"asyncpg;~Connection;Member[copy_from_query,copy_from_table].Argument[output:];path-injection",
42+
"asyncpg;~Connection;Member[copy_to_table].Argument[source:];path-injection",
43+
// the `PreparedStatement` class in `asyncpg`.
44+
"asyncpg;Connection;Member[prepare].Argument[0,query:];sql-injection",
45+
]
9546
}
9647
}
9748

@@ -106,7 +57,9 @@ private module Asyncpg {
10657
*/
10758
module Cursor {
10859
class CursorConstruction extends SqlConstruction::Range, API::CallNode {
109-
CursorConstruction() { this = connection().getMember("cursor").getACall() }
60+
CursorConstruction() {
61+
this = ModelOutput::getATypeNode("asyncpg", "Connection").getMember("cursor").getACall()
62+
}
11063

11164
override DataFlow::Node getSql() { result = this.getParameter(0, "query").getARhs() }
11265
}
@@ -121,8 +74,11 @@ private module Asyncpg {
12174
this = c.getReturn().getAwaited().getAnImmediateUse()
12275
)
12376
or
124-
exists(PreparedStatement::PreparedStatementConstruction prepareCall |
125-
sql = prepareCall.getSql() and
77+
exists(API::CallNode prepareCall |
78+
prepareCall =
79+
ModelOutput::getATypeNode("asyncpg", "Connection").getMember("prepare").getACall()
80+
|
81+
sql = prepareCall.getParameter(0, "query").getARhs() and
12682
this =
12783
prepareCall
12884
.getReturn()
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/**
2+
* Provides classes for contributing a model, or using the interpreted results
3+
* of a model represented as data.
4+
*
5+
* - Use the `ModelInput` module to contribute new models.
6+
* - Use the `ModelOutput` module to access the model results in terms of API nodes.
7+
*
8+
* The package name refers to the top-level module the import comes from, and not a PyPI package.
9+
* So for `from foo.bar import baz`, the package will be `foo`.
10+
*/
11+
12+
private import python
13+
private import internal.ApiGraphModels as Shared
14+
private import internal.ApiGraphModelsSpecific as Specific
15+
import Shared::ModelInput as ModelInput
16+
import Shared::ModelOutput as ModelOutput
17+
private import semmle.python.dataflow.new.RemoteFlowSources
18+
private import semmle.python.dataflow.new.DataFlow
19+
private import semmle.python.ApiGraphs
20+
private import semmle.python.dataflow.new.TaintTracking
21+
22+
/**
23+
* A remote flow source originating from a CSV source row.
24+
*/
25+
private class RemoteFlowSourceFromCsv extends RemoteFlowSource {
26+
RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").getAnImmediateUse() }
27+
28+
override string getSourceType() { result = "Remote flow (from model)" }
29+
}
30+
31+
/**
32+
* Like `ModelOutput::summaryStep` but with API nodes mapped to data-flow nodes.
33+
*/
34+
private predicate summaryStepNodes(DataFlow::Node pred, DataFlow::Node succ, string kind) {
35+
exists(API::Node predNode, API::Node succNode |
36+
Specific::summaryStep(predNode, succNode, kind) and
37+
pred = predNode.getARhs() and
38+
succ = succNode.getAnImmediateUse()
39+
)
40+
}
41+
42+
/** Taint steps induced by summary models of kind `taint`. */
43+
private class TaintStepFromSummary extends TaintTracking::AdditionalTaintStep {
44+
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
45+
summaryStepNodes(pred, succ, "taint")
46+
}
47+
}

0 commit comments

Comments
 (0)