Skip to content

Commit c34d6d1

Browse files
committed
Python: Add query to handle SQLAlchemy TextClause Injection
instead of doing this via taint-steps. See description in code/tests.
1 parent 81dbe36 commit c34d6d1

File tree

15 files changed

+390
-126
lines changed

15 files changed

+390
-126
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Introduced a new query _SQLAlchemy TextClause built from user-controlled sources_ (`py/sqlalchemy-textclause-injection`) to alert if user-input is added to a TextClause from SQLAlchemy, since that can lead to SQL injection.

python/ql/lib/semmle/python/frameworks/SqlAlchemy.qll

Lines changed: 56 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ private import semmle.python.Concepts
1515
private import semmle.python.frameworks.PEP249::PEP249 as PEP249
1616

1717
/**
18+
* INTERNAL: Do not use.
19+
*
1820
* Provides models for the `SQLAlchemy` PyPI package.
1921
* See
2022
* - https://pypi.org/project/SQLAlchemy/
2123
* - https://docs.sqlalchemy.org/en/14/index.html
2224
*/
23-
private module SqlAlchemy {
25+
module SqlAlchemy {
2426
/**
2527
* Provides models for the `sqlalchemy.engine.Engine` and `sqlalchemy.future.Engine` classes.
2628
*
@@ -279,80 +281,62 @@ private module SqlAlchemy {
279281
}
280282

281283
/**
282-
* Additional taint-steps for `sqlalchemy.text()`
284+
* Provides models for the `sqlalchemy.sql.expression.TextClause` class,
285+
* which represents a textual SQL string directly.
283286
*
284-
* See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text
285-
* See https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause
286-
*/
287-
class SqlAlchemyTextAdditionalTaintSteps extends TaintTracking::AdditionalTaintStep {
288-
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
289-
exists(DataFlow::CallCfgNode call |
290-
(
291-
call = API::moduleImport("sqlalchemy").getMember("text").getACall()
292-
or
293-
call = API::moduleImport("sqlalchemy").getMember("sql").getMember("text").getACall()
294-
or
295-
call =
296-
API::moduleImport("sqlalchemy")
297-
.getMember("sql")
298-
.getMember("expression")
299-
.getMember("text")
300-
.getACall()
301-
or
302-
call =
303-
API::moduleImport("sqlalchemy")
304-
.getMember("sql")
305-
.getMember("expression")
306-
.getMember("TextClause")
307-
.getACall()
308-
) and
309-
nodeFrom in [call.getArg(0), call.getArgByName("text")] and
310-
nodeTo = call
311-
)
312-
}
313-
}
314-
}
315-
316-
private module OldModeling {
317-
/**
318-
* Returns an instantization of a SqlAlchemy Session object.
319-
* See https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.Session and
320-
* https://docs.sqlalchemy.org/en/14/orm/session_api.html#sqlalchemy.orm.sessionmaker
321-
*/
322-
private API::Node getSqlAlchemySessionInstance() {
323-
result = API::moduleImport("sqlalchemy.orm").getMember("Session").getReturn() or
324-
result = API::moduleImport("sqlalchemy.orm").getMember("sessionmaker").getReturn().getReturn()
325-
}
326-
327-
/**
328-
* Returns an instantization of a SqlAlchemy Query object.
329-
* See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query
287+
* ```py
288+
* session.query(For14).filter_by(description=sqlalchemy.text(f"'{user_input}'")).all()
289+
* ```
290+
*
291+
* Initially I wanted to add lots of additional taint steps for such that the normal
292+
* SQL injection query would be able to find cases as the one above where an ORM query
293+
* includes a TextClause that includes user-input directly... But that presented 2
294+
* problems:
295+
*
296+
* - which part of the query construction above should be marked as SQL to fit our
297+
* `SqlExecution` concept. Nothing really fits this well, since all the SQL
298+
* execution happens under the hood.
299+
* - This would require a LOT of modeling for these additional taint steps, since
300+
* there are many many constructs we would need to have models for. (see the 2
301+
* examples below)
302+
*
303+
* So instead we flag user-input to a TextClause with its' own query
304+
* (`py/sqlalchemy-textclause-injection`). And so we don't highlight any parts of an
305+
* ORM constructed query such as these as containing SQL, and don't need the additional
306+
* taint steps either.
307+
*
308+
* See
309+
* - https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.TextClause.
310+
* - https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text
330311
*/
331-
private API::Node getSqlAlchemyQueryInstance() {
332-
result = getSqlAlchemySessionInstance().getMember("query").getReturn()
333-
}
312+
module TextClause {
313+
/**
314+
* A construction of a `sqlalchemy.sql.expression.TextClause`, which represents a
315+
* textual SQL string directly.
316+
*/
317+
class TextClauseConstruction extends DataFlow::CallCfgNode {
318+
TextClauseConstruction() {
319+
this = API::moduleImport("sqlalchemy").getMember("text").getACall()
320+
or
321+
this = API::moduleImport("sqlalchemy").getMember("sql").getMember("text").getACall()
322+
or
323+
this =
324+
API::moduleImport("sqlalchemy")
325+
.getMember("sql")
326+
.getMember("expression")
327+
.getMember("text")
328+
.getACall()
329+
or
330+
this =
331+
API::moduleImport("sqlalchemy")
332+
.getMember("sql")
333+
.getMember("expression")
334+
.getMember("TextClause")
335+
.getACall()
336+
}
334337

335-
/**
336-
* A call on a Query object
337-
* See https://docs.sqlalchemy.org/en/14/orm/query.html?highlight=query#sqlalchemy.orm.Query
338-
*/
339-
private class SqlAlchemyQueryCall extends DataFlow::CallCfgNode, SqlExecution::Range {
340-
SqlAlchemyQueryCall() {
341-
this =
342-
getSqlAlchemyQueryInstance()
343-
.getMember(any(SqlAlchemyVulnerableMethodNames methodName))
344-
.getACall()
338+
/** Gets the argument that specifies the SQL text. */
339+
DataFlow::Node getTextArg() { result in [this.getArg(0), this.getArgByName("text")] }
345340
}
346-
347-
override DataFlow::Node getSql() { result = this.getArg(0) }
348-
}
349-
350-
/**
351-
* This class represents a list of methods vulnerable to sql injection.
352-
*
353-
* See https://github.com/jty-team/codeql/pull/2#issue-611592361
354-
*/
355-
private class SqlAlchemyVulnerableMethodNames extends string {
356-
SqlAlchemyVulnerableMethodNames() { this in ["filter", "filter_by", "group_by", "order_by"] }
357341
}
358342
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/**
2+
* Provides a taint-tracking configuration for detecting "SQLAlchemy TextClause injection" vulnerabilities.
3+
*
4+
* Note, for performance reasons: only import this file if
5+
* `SQLAlchemyTextClause::Configuration` is needed, otherwise
6+
* `SQLAlchemyTextClauseCustomizations` should be imported instead.
7+
*/
8+
9+
private import python
10+
import semmle.python.dataflow.new.DataFlow
11+
import semmle.python.dataflow.new.TaintTracking
12+
13+
/**
14+
* Provides a taint-tracking configuration for detecting "SQLAlchemy TextClause injection" vulnerabilities.
15+
*/
16+
module SQLAlchemyTextClause {
17+
import SQLAlchemyTextClauseCustomizations::SQLAlchemyTextClause
18+
19+
/**
20+
* A taint-tracking configuration for detecting "SQLAlchemy TextClause injection" vulnerabilities.
21+
*/
22+
class Configuration extends TaintTracking::Configuration {
23+
Configuration() { this = "SQLAlchemyTextClause" }
24+
25+
override predicate isSource(DataFlow::Node source) { source instanceof Source }
26+
27+
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
28+
29+
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
30+
31+
override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) {
32+
guard instanceof SanitizerGuard
33+
}
34+
}
35+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for detecting
3+
* "SQLAlchemy TextClause injection"
4+
* vulnerabilities, as well as extension points for adding your own.
5+
*/
6+
7+
private import python
8+
private import semmle.python.dataflow.new.DataFlow
9+
private import semmle.python.Concepts
10+
private import semmle.python.dataflow.new.RemoteFlowSources
11+
private import semmle.python.dataflow.new.BarrierGuards
12+
private import semmle.python.frameworks.SqlAlchemy
13+
14+
/**
15+
* Provides default sources, sinks and sanitizers for detecting
16+
* "SQLAlchemy TextClause injection"
17+
* vulnerabilities, as well as extension points for adding your own.
18+
*/
19+
module SQLAlchemyTextClause {
20+
/**
21+
* A data flow source for "SQLAlchemy TextClause injection" vulnerabilities.
22+
*/
23+
abstract class Source extends DataFlow::Node { }
24+
25+
/**
26+
* A data flow sink for "SQLAlchemy TextClause injection" vulnerabilities.
27+
*/
28+
abstract class Sink extends DataFlow::Node { }
29+
30+
/**
31+
* A sanitizer for "SQLAlchemy TextClause injection" vulnerabilities.
32+
*/
33+
abstract class Sanitizer extends DataFlow::Node { }
34+
35+
/**
36+
* A sanitizer guard for "SQLAlchemy TextClause injection" vulnerabilities.
37+
*/
38+
abstract class SanitizerGuard extends DataFlow::BarrierGuard { }
39+
40+
/**
41+
* A source of remote user input, considered as a flow source.
42+
*/
43+
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
44+
45+
/**
46+
* The text argument of a SQLAlchemy TextClause construction, considered as a flow sink.
47+
*/
48+
class TextArgAsSink extends Sink {
49+
TextArgAsSink() { this = any(SqlAlchemy::TextClause::TextClauseConstruction tcc).getTextArg() }
50+
}
51+
52+
/**
53+
* A comparison with a constant string, considered as a sanitizer-guard.
54+
*/
55+
class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { }
56+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
The <code>TextClause</code> class in the <code>SQLAlchemy</code> PyPI package represents
9+
a textual SQL string directly. If user-input is added to it without sufficient
10+
sanitization, a user may be able to run malicious database queries, since the
11+
<code>TextClause</code> is inserted directly into the final SQL.
12+
</overview>
13+
14+
<recommendation>
15+
<p>
16+
Don't allow user-input to be added to a <code>TextClause</code>, instead construct your
17+
full query with constructs from the ORM, or use query parameters for user-input.
18+
</p>
19+
</recommendation>
20+
21+
<example>
22+
<p>
23+
In the following snippet, a user is fetched from the database using three
24+
different queries.
25+
</p>
26+
27+
<p>
28+
In the first case, the final query string is built by directly including a user-supplied
29+
input. The parameter may include quote characters, so this code is vulnerable to a SQL
30+
injection attack.
31+
</p>
32+
33+
<p>
34+
In the second case, the query is built using ORM models, but part of it is using a
35+
<code>TextClause</code> directly including a user-supplied input. Since the
36+
<code>TextClause</code> is inserted directly into the final SQL, this code is vulnerable
37+
to a SQL injection attack.
38+
</p>
39+
40+
<p>
41+
In the third case, the query is built fully using the ORM models, so in the end, the
42+
user-supplied input will passed passed to the database using query parameters. The
43+
database connector library will take care of escaping and inserting quotes as needed.
44+
</p>
45+
46+
<sample src="examples/sqlalchemy_textclause_injection.py" />
47+
</example>
48+
49+
<references>
50+
<li><a href="https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.text.params.text">Official documentation of the text parameter</a>.</li>
51+
</references>
52+
</qhelp>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/**
2+
* @name SQLAlchemy TextClause built from user-controlled sources
3+
* @description Building a TextClause query from user-controlled sources is vulnerable to insertion of
4+
* malicious SQL code by the user.
5+
* @kind path-problem
6+
* @problem.severity error
7+
* @security-severity 8.8
8+
* @precision high
9+
* @id py/sqlalchemy-textclause-injection
10+
* @tags security
11+
* external/cwe/cwe-089
12+
* external/owasp/owasp-a1
13+
*/
14+
15+
import python
16+
import semmle.python.security.dataflow.SQLAlchemyTextClause
17+
import DataFlow::PathGraph
18+
19+
from SQLAlchemyTextClause::Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
20+
where config.hasFlowPath(source, sink)
21+
select sink.getNode(), source, sink,
22+
"This SQLAlchemy TextClause depends on $@, which could lead to SQL injection.", source.getNode(),
23+
"a user-provided value"
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from flask import Flask, request
2+
import sqlalchemy
3+
import sqlalchemy.orm
4+
5+
app = Flask(__name__)
6+
engine = sqlalchemy.create_engine(...)
7+
Base = sqlalchemy.orm.declarative_base()
8+
9+
10+
class User(Base):
11+
__tablename__ = "users"
12+
13+
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
14+
username = sqlalchemy.Column(sqlalchemy.String)
15+
16+
17+
@app.route("/users/<username>")
18+
def show_user(username):
19+
session = sqlalchemy.orm.Session(engine)
20+
21+
# BAD, normal SQL injection
22+
stmt = sqlalchemy.text("SELECT * FROM users WHERE username = '{}'".format(username))
23+
results = session.execute(stmt).fetchall()
24+
25+
# BAD, allows SQL injection
26+
username_formatted_for_sql = sqlalchemy.text("'{}'".format(username))
27+
stmt = sqlalchemy.select(User).where(User.username == username_formatted_for_sql)
28+
results = session.execute(stmt).scalars().all()
29+
30+
# GOOD, does not allow for SQL injection
31+
stmt = sqlalchemy.select(User).where(User.username == username)
32+
results = session.execute(stmt).scalars().all()
33+
34+
...
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
import python
22
import experimental.meta.ConceptsTest
3-
import experimental.semmle.python.frameworks.SqlAlchemy
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
import experimental.meta.InlineTaintTest
2-
import experimental.semmle.python.frameworks.SqlAlchemy

python/ql/test/library-tests/frameworks/sqlalchemy/SqlExecution.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ class User(Base):
4747

4848
# Injection requiring the text() taint-step
4949
t = text("some sql")
50-
session.query(User).filter(t) # $ getSql=t
51-
session.query(User).group_by(User.id).having(t) # $ getSql=User.id MISSING: getSql=t
52-
session.query(User).group_by(t).first() # $ getSql=t
53-
session.query(User).order_by(t).first() # $ getSql=t
50+
session.query(User).filter(t)
51+
session.query(User).group_by(User.id).having(t)
52+
session.query(User).group_by(t).first()
53+
session.query(User).order_by(t).first()
5454

5555
query = select(User).where(User.name == t) # $ MISSING: getSql=t
5656
with engine.connect() as conn:

0 commit comments

Comments
 (0)