Skip to content

Commit 85f00fd

Browse files
authored
Merge pull request #6776 from yoff/python/model-asyncpg
Python: Model `asyncpg`
2 parents 3a1836c + 0f2f68b commit 85f00fd

File tree

24 files changed

+533
-31
lines changed

24 files changed

+533
-31
lines changed

docs/codeql/support/reusables/frameworks.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ Python built-in support
171171
multidict, Utility library
172172
yarl, Utility library
173173
aioch, Database
174+
asyncpg, Database
174175
clickhouse-driver, Database
175176
mysql-connector-python, Database
176177
mysql-connector, Database
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
lgtm,codescanning
2+
* Added modeling of `asyncpg` for sinks executing SQL and/or accessing the file system.
3+
* Corrected the API graph, such that all awaited values now are referred to via `getAwaited`.

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ module API {
304304
* API graph node for the prefix `foo`), in accordance with the usual semantics of Python.
305305
*/
306306

307+
private import semmle.python.internal.Awaited
308+
307309
cached
308310
newtype TApiNode =
309311
/** The root of the API graph. */
@@ -518,10 +520,9 @@ module API {
518520
)
519521
or
520522
// awaiting
521-
exists(Await await, DataFlow::Node awaitedValue |
523+
exists(DataFlow::Node awaitedValue |
522524
lbl = Label::await() and
523-
ref.asExpr() = await and
524-
await.getValue() = awaitedValue.asExpr() and
525+
ref = awaited(awaitedValue) and
525526
pred.flowsTo(awaitedValue)
526527
)
527528
)

python/ql/lib/semmle/python/Concepts.qll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,9 +326,47 @@ module CodeExecution {
326326
}
327327
}
328328

329+
/**
330+
* A data-flow node that constructs an SQL statement.
331+
* Often, it is worthy of an alert if an SQL statement is constructed such that
332+
* executing it would be a security risk.
333+
*
334+
* If it is important that the SQL statement is indeed executed, then use `SQLExecution`.
335+
*
336+
* Extend this class to refine existing API models. If you want to model new APIs,
337+
* extend `SqlConstruction::Range` instead.
338+
*/
339+
class SqlConstruction extends DataFlow::Node {
340+
SqlConstruction::Range range;
341+
342+
SqlConstruction() { this = range }
343+
344+
/** Gets the argument that specifies the SQL statements to be constructed. */
345+
DataFlow::Node getSql() { result = range.getSql() }
346+
}
347+
348+
/** Provides a class for modeling new SQL execution APIs. */
349+
module SqlConstruction {
350+
/**
351+
* A data-flow node that constructs an SQL statement.
352+
* Often, it is worthy of an alert if an SQL statement is constructed such that
353+
* executing it would be a security risk.
354+
*
355+
* Extend this class to model new APIs. If you want to refine existing API models,
356+
* extend `SqlExecution` instead.
357+
*/
358+
abstract class Range extends DataFlow::Node {
359+
/** Gets the argument that specifies the SQL statements to be constructed. */
360+
abstract DataFlow::Node getSql();
361+
}
362+
}
363+
329364
/**
330365
* A data-flow node that executes SQL statements.
331366
*
367+
* If the context of interest is such that merely constructing an SQL statement
368+
* would be valuabe to report, then consider using `SqlConstruction`.
369+
*
332370
* Extend this class to refine existing API models. If you want to model new APIs,
333371
* extend `SqlExecution::Range` instead.
334372
*/
@@ -346,6 +384,9 @@ module SqlExecution {
346384
/**
347385
* A data-flow node that executes SQL statements.
348386
*
387+
* If the context of interest is such that merely constructing an SQL statement
388+
* would be valuabe to report, then consider using `SqlConstruction`.
389+
*
349390
* Extend this class to model new APIs. If you want to refine existing API models,
350391
* extend `SqlExecution` instead.
351392
*/

python/ql/lib/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// `docs/codeql/support/reusables/frameworks.rst`
77
private import semmle.python.frameworks.Aioch
88
private import semmle.python.frameworks.Aiohttp
9+
private import semmle.python.frameworks.Asyncpg
910
private import semmle.python.frameworks.ClickhouseDriver
1011
private import semmle.python.frameworks.Cryptodome
1112
private import semmle.python.frameworks.Cryptography

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,23 @@ module EssaFlow {
178178
// see `with_flow` in `python/ql/src/semmle/python/dataflow/Implementation.qll`
179179
with.getContextExpr() = contextManager.getNode() and
180180
with.getOptionalVars() = var.getNode() and
181+
not with.isAsync() and
181182
contextManager.strictlyDominates(var)
182183
)
183184
or
185+
// Async with var definition
186+
// `async with f(42) as x:`
187+
// nodeFrom is `x`, cfg node
188+
// nodeTo is `x`, essa var
189+
//
190+
// This makes the cfg node the local source of the awaited value.
191+
exists(With with, ControlFlowNode var |
192+
nodeFrom.(CfgNode).getNode() = var and
193+
nodeTo.(EssaNode).getVar().getDefinition().(WithDefinition).getDefiningNode() = var and
194+
with.getOptionalVars() = var.getNode() and
195+
with.isAsync()
196+
)
197+
or
184198
// Parameter definition
185199
// `def foo(x):`
186200
// nodeFrom is `x`, cfgNode
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `asyncpg` PyPI package.
3+
* See https://magicstack.github.io/asyncpg/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.Concepts
9+
private import semmle.python.ApiGraphs
10+
11+
/** Provides models for the `asyncpg` PyPI package. */
12+
private module Asyncpg {
13+
private import semmle.python.internal.Awaited
14+
15+
/** A `ConectionPool` is created when the result of `asyncpg.create_pool()` is awaited. */
16+
API::Node connectionPool() {
17+
result = API::moduleImport("asyncpg").getMember("create_pool").getReturn().getAwaited()
18+
}
19+
20+
/**
21+
* A `Connection` is created when
22+
* - the result of `asyncpg.connect()` is awaited.
23+
* - the result of calling `aquire` on a `ConnectionPool` is awaited.
24+
*/
25+
API::Node connection() {
26+
result = API::moduleImport("asyncpg").getMember("connect").getReturn().getAwaited()
27+
or
28+
result = connectionPool().getMember("acquire").getReturn().getAwaited()
29+
}
30+
31+
/** `Connection`s and `ConnectionPool`s provide some methods that execute SQL. */
32+
class SqlExecutionOnConnection extends SqlExecution::Range, DataFlow::MethodCallNode {
33+
string methodName;
34+
35+
SqlExecutionOnConnection() {
36+
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval", "executemany"] and
37+
this.calls([connectionPool().getAUse(), connection().getAUse()], methodName)
38+
}
39+
40+
override DataFlow::Node getSql() {
41+
methodName in ["copy_from_query", "execute", "fetch", "fetchrow", "fetchval"] and
42+
result in [this.getArg(0), this.getArgByName("query")]
43+
or
44+
methodName = "executemany" and
45+
result in [this.getArg(0), this.getArgByName("command")]
46+
}
47+
}
48+
49+
/** `Connection`s and `ConnectionPool`s provide some methods that access the file system. */
50+
class FileAccessOnConnection extends FileSystemAccess::Range, DataFlow::MethodCallNode {
51+
string methodName;
52+
53+
FileAccessOnConnection() {
54+
methodName in ["copy_from_query", "copy_from_table", "copy_to_table"] and
55+
this.calls([connectionPool().getAUse(), connection().getAUse()], methodName)
56+
}
57+
58+
// The path argument is keyword only.
59+
override DataFlow::Node getAPathArgument() {
60+
methodName in ["copy_from_query", "copy_from_table"] and
61+
result = this.getArgByName("output")
62+
or
63+
methodName = "copy_to_table" and
64+
result = this.getArgByName("source")
65+
}
66+
}
67+
68+
/**
69+
* Provides models of the `PreparedStatement` class in `asyncpg`.
70+
* `PreparedStatement`s are created when the result of calling `prepare(query)` on a connection is awaited.
71+
* The result of calling `prepare(query)` is a `PreparedStatementFactory` and the argument, `query` needs to
72+
* be tracked to the place where a `PreparedStatement` is created and then futher to any executing methods.
73+
* Hence the two type trackers.
74+
*
75+
* TODO: Rewrite this, once we have `API::CallNode` available.
76+
*/
77+
module PreparedStatement {
78+
class PreparedStatementConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
79+
PreparedStatementConstruction() { this = connection().getMember("prepare").getACall() }
80+
81+
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
82+
}
83+
84+
private DataFlow::TypeTrackingNode preparedStatementFactory(
85+
DataFlow::TypeTracker t, DataFlow::Node sql
86+
) {
87+
t.start() and
88+
sql = result.(PreparedStatementConstruction).getSql()
89+
or
90+
exists(DataFlow::TypeTracker t2 | result = preparedStatementFactory(t2, sql).track(t2, t))
91+
}
92+
93+
DataFlow::Node preparedStatementFactory(DataFlow::Node sql) {
94+
preparedStatementFactory(DataFlow::TypeTracker::end(), sql).flowsTo(result)
95+
}
96+
97+
private DataFlow::TypeTrackingNode preparedStatement(DataFlow::TypeTracker t, DataFlow::Node sql) {
98+
t.start() and
99+
result = awaited(preparedStatementFactory(sql))
100+
or
101+
exists(DataFlow::TypeTracker t2 | result = preparedStatement(t2, sql).track(t2, t))
102+
}
103+
104+
DataFlow::Node preparedStatement(DataFlow::Node sql) {
105+
preparedStatement(DataFlow::TypeTracker::end(), sql).flowsTo(result)
106+
}
107+
108+
class PreparedStatementExecution extends SqlExecution::Range, DataFlow::MethodCallNode {
109+
DataFlow::Node sql;
110+
111+
PreparedStatementExecution() {
112+
this.calls(preparedStatement(sql), ["executemany", "fetch", "fetchrow", "fetchval"])
113+
}
114+
115+
override DataFlow::Node getSql() { result = sql }
116+
}
117+
}
118+
119+
/**
120+
* Provides models of the `Cursor` class in `asyncpg`.
121+
* `Cursor`s are created
122+
* - when the result of calling `cursor(query)` on a connection is awaited.
123+
* - when the result of calling `cursor()` on a prepared statement is awaited.
124+
* The result of calling `cursor` in either case is a `CursorFactory` and the argument, `query` needs to
125+
* be tracked to the place where a `Cursor` is created, hence the type tracker.
126+
* The creation of the `Cursor` executes the query.
127+
*
128+
* TODO: Rewrite this, once we have `API::CallNode` available.
129+
*/
130+
module Cursor {
131+
class CursorConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
132+
CursorConstruction() { this = connection().getMember("cursor").getACall() }
133+
134+
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("query")] }
135+
}
136+
137+
private DataFlow::TypeTrackingNode cursorFactory(DataFlow::TypeTracker t, DataFlow::Node sql) {
138+
// cursor created from connection
139+
t.start() and
140+
sql = result.(CursorConstruction).getSql()
141+
or
142+
// cursor created from prepared statement
143+
t.start() and
144+
result.(DataFlow::MethodCallNode).calls(PreparedStatement::preparedStatement(sql), "cursor")
145+
or
146+
exists(DataFlow::TypeTracker t2 | result = cursorFactory(t2, sql).track(t2, t))
147+
}
148+
149+
DataFlow::Node cursorFactory(DataFlow::Node sql) {
150+
cursorFactory(DataFlow::TypeTracker::end(), sql).flowsTo(result)
151+
}
152+
153+
/** The creation of a `Cursor` executes the associated query. */
154+
class CursorCreation extends SqlExecution::Range {
155+
DataFlow::Node sql;
156+
157+
CursorCreation() { this = awaited(cursorFactory(sql)) }
158+
159+
override DataFlow::Node getSql() { result = sql }
160+
}
161+
}
162+
}

python/ql/lib/semmle/python/frameworks/SqlAlchemy.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,9 +313,9 @@ module SqlAlchemy {
313313
* A construction of a `sqlalchemy.sql.expression.TextClause`, which represents a
314314
* textual SQL string directly.
315315
*/
316-
abstract class TextClauseConstruction extends DataFlow::CallCfgNode {
316+
abstract class TextClauseConstruction extends SqlConstruction::Range, DataFlow::CallCfgNode {
317317
/** Gets the argument that specifies the SQL text. */
318-
DataFlow::Node getTextArg() { result in [this.getArg(0), this.getArgByName("text")] }
318+
override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("text")] }
319319
}
320320

321321
/** `TextClause` constructions from the `sqlalchemy` package. */
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/**
2+
* INTERNAL: Do not use.
3+
*
4+
* Provides helper class for defining additional API graph edges.
5+
*/
6+
7+
private import python
8+
private import semmle.python.dataflow.new.DataFlow
9+
10+
/**
11+
* INTERNAL: Do not use.
12+
*
13+
* Holds if `result` is the result of awaiting `awaitedValue`.
14+
*/
15+
cached
16+
DataFlow::Node awaited(DataFlow::Node awaitedValue) {
17+
// `await` x
18+
// - `awaitedValue` is `x`
19+
// - `result` is `await x`
20+
exists(Await await |
21+
await.getValue() = awaitedValue.asExpr() and
22+
result.asExpr() = await
23+
)
24+
or
25+
// `async for x in l`
26+
// - `awaitedValue` is `l`
27+
// - `result` is `l` (`x` is behind a read step)
28+
exists(AsyncFor asyncFor |
29+
// To consider `x` the result of awaiting, we would use asyncFor.getTarget() = awaitedValue.asExpr(),
30+
// but that is behind a read step rather than a flow step.
31+
asyncFor.getIter() = awaitedValue.asExpr() and
32+
result.asExpr() = asyncFor.getIter()
33+
)
34+
or
35+
// `async with x as y`
36+
// - `awaitedValue` is `x`
37+
// - `result` is `x` and `y` if it exists
38+
exists(AsyncWith asyncWith |
39+
awaitedValue.asExpr() = asyncWith.getContextExpr() and
40+
result.asExpr() in [
41+
// `x`
42+
asyncWith.getContextExpr(),
43+
// `y`, if it exists
44+
asyncWith.getOptionalVars()
45+
]
46+
)
47+
}

python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,17 @@ module SqlInjection {
4343
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
4444

4545
/**
46-
* A SQL statement of a SQL execution, considered as a flow sink.
46+
* A SQL statement of a SQL construction, considered as a flow sink.
4747
*/
48-
class SqlExecutionAsSink extends Sink {
49-
SqlExecutionAsSink() { this = any(SqlExecution e).getSql() }
48+
class SqlConstructionAsSink extends Sink {
49+
SqlConstructionAsSink() { this = any(SqlConstruction c).getSql() }
5050
}
5151

5252
/**
53-
* The text argument of a SQLAlchemy TextClause construction, considered as a flow sink.
53+
* A SQL statement of a SQL execution, considered as a flow sink.
5454
*/
55-
class TextArgAsSink extends Sink {
56-
TextArgAsSink() { this = any(SqlAlchemy::TextClause::TextClauseConstruction tcc).getTextArg() }
55+
class SqlExecutionAsSink extends Sink {
56+
SqlExecutionAsSink() { this = any(SqlExecution e).getSql() }
5757
}
5858

5959
/**

0 commit comments

Comments
 (0)