Skip to content

Commit ca341bd

Browse files
authored
Merge pull request github#5612 from jty-team/jty/python/nosqlInjection
Python: CWE-943 - Add NoSQL injection query
2 parents 43355fe + e6ce10b commit ca341bd

19 files changed

+807
-0
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
Passing user-controlled sources into NoSQL queries can result in a NoSQL injection flaw.
9+
This tainted NoSQL query containing a user-controlled source can then execute a malicious query in a NoSQL database such as MongoDB.
10+
In order for the user-controlled source to taint the NoSQL query, the user-controller source must be converted into a Python object using something like <code>json.loads</code> or <code>xmltodict.parse</code>.
11+
</p>
12+
<p>
13+
Because a user-controlled source is passed into the query, the malicious user can have complete control over the query itself.
14+
When the tainted query is executed, the malicious user can commit malicious actions such as bypassing role restrictions or accessing and modifying restricted data in the NoSQL database.
15+
</p>
16+
</overview>
17+
18+
<recommendation>
19+
<p>
20+
NoSQL injections can be prevented by escaping user-input's special characters that are passed into the NoSQL query from the user-supplied source.
21+
Alternatively, using a sanitize library such as MongoSanitizer will ensure that user-supplied sources can not act as a malicious query.
22+
</p>
23+
</recommendation>
24+
25+
<example>
26+
<p>In the example below, the user-supplied source is passed to a MongoDB function that queries the MongoDB database.</p>
27+
<sample src="examples/NoSQLInjection-bad.py" />
28+
<p> This can be fixed by using a sanitizer library like MongoSanitizer as shown in this annotated code version below.</p>
29+
<sample src="examples/NoSQLInjection-good.py" />
30+
</example>
31+
32+
<references>
33+
<li>Mongoengine: <a href="http://mongoengine.org/">Documentation</a>.</li>
34+
<li>Flask-Mongoengine: <a href="http://docs.mongoengine.org/projects/flask-mongoengine/en/latest/">Documentation</a>.</li>
35+
<li>PyMongo: <a href="https://pypi.org/project/pymongo/">Documentation</a>.</li>
36+
<li>Flask-PyMongo: <a href="https://flask-pymongo.readthedocs.io/en/latest/">Documentation</a>.</li>
37+
<li>OWASP: <a href="https://owasp.org/www-pdf-archive/GOD16-NOSQL.pdf">NoSQL Injection</a>.</li>
38+
<li>Security Stack Exchange Discussion: <a href="https://security.stackexchange.com/questions/83231/mongodb-nosql-injection-in-python-code">Question 83231</a>.</li>
39+
</references>
40+
</qhelp>
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* @name NoSQL Injection
3+
* @description Building a NoSQL query from user-controlled sources is vulnerable to insertion of
4+
* malicious NoSQL code by the user.
5+
* @kind path-problem
6+
* @problem.severity error
7+
* @id py/nosql-injection
8+
* @tags experimental
9+
* security
10+
* external/cwe/cwe-943
11+
*/
12+
13+
import python
14+
import experimental.semmle.python.security.injection.NoSQLInjection
15+
16+
from CustomPathNode source, CustomPathNode sink
17+
where noSQLInjectionFlow(source, sink)
18+
select sink, source, sink, "$@ NoSQL query contains an unsanitized $@", sink, "This", source,
19+
"user-provided value"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from flask import Flask, request
2+
from flask_pymongo import PyMongo
3+
import json
4+
5+
mongo = PyMongo(app)
6+
7+
8+
@app.route("/")
9+
def home_page():
10+
unsanitized_search = request.args['search']
11+
json_search = json.loads(unsanitized_search)
12+
13+
result = mongo.db.user.find({'name': json_search})
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from flask import Flask, request
2+
from flask_pymongo import PyMongo
3+
from mongosanitizer.sanitizer import sanitize
4+
import json
5+
6+
mongo = PyMongo(app)
7+
8+
9+
@app.route("/")
10+
def home_page():
11+
unsafe_search = request.args['search']
12+
json_search = json.loads(unsafe_search)
13+
safe_search = sanitize(unsanitized_search)
14+
15+
result = client.db.collection.find_one({'data': safe_search})

python/ql/src/experimental/semmle/python/Concepts.qll

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,61 @@ class SQLEscape extends DataFlow::Node {
209209
*/
210210
DataFlow::Node getAnInput() { result = range.getAnInput() }
211211
}
212+
213+
/** Provides a class for modeling NoSQL execution APIs. */
214+
module NoSQLQuery {
215+
/**
216+
* A data-flow node that executes NoSQL queries.
217+
*
218+
* Extend this class to model new APIs. If you want to refine existing API models,
219+
* extend `NoSQLQuery` instead.
220+
*/
221+
abstract class Range extends DataFlow::Node {
222+
/** Gets the argument that specifies the NoSQL query to be executed. */
223+
abstract DataFlow::Node getQuery();
224+
}
225+
}
226+
227+
/**
228+
* A data-flow node that executes NoSQL queries.
229+
*
230+
* Extend this class to refine existing API models. If you want to model new APIs,
231+
* extend `NoSQLQuery::Range` instead.
232+
*/
233+
class NoSQLQuery extends DataFlow::Node {
234+
NoSQLQuery::Range range;
235+
236+
NoSQLQuery() { this = range }
237+
238+
/** Gets the argument that specifies the NoSQL query to be executed. */
239+
DataFlow::Node getQuery() { result = range.getQuery() }
240+
}
241+
242+
/** Provides classes for modeling NoSQL sanitization-related APIs. */
243+
module NoSQLSanitizer {
244+
/**
245+
* A data-flow node that collects functions sanitizing NoSQL queries.
246+
*
247+
* Extend this class to model new APIs. If you want to refine existing API models,
248+
* extend `NoSQLSanitizer` instead.
249+
*/
250+
abstract class Range extends DataFlow::Node {
251+
/** Gets the argument that specifies the NoSQL query to be sanitized. */
252+
abstract DataFlow::Node getAnInput();
253+
}
254+
}
255+
256+
/**
257+
* A data-flow node that collects functions sanitizing NoSQL queries.
258+
*
259+
* Extend this class to model new APIs. If you want to refine existing API models,
260+
* extend `NoSQLSanitizer::Range` instead.
261+
*/
262+
class NoSQLSanitizer extends DataFlow::Node {
263+
NoSQLSanitizer::Range range;
264+
265+
NoSQLSanitizer() { this = range }
266+
267+
/** Gets the argument that specifies the NoSQL query to be sanitized. */
268+
DataFlow::Node getAnInput() { result = range.getAnInput() }
269+
}

python/ql/src/experimental/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44

55
private import experimental.semmle.python.frameworks.Stdlib
66
private import experimental.semmle.python.frameworks.LDAP
7+
private import experimental.semmle.python.frameworks.NoSQL
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the standard libraries.
3+
* Note: some modeling is done internally in the dataflow/taint tracking implementation.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.TaintTracking
9+
private import semmle.python.dataflow.new.RemoteFlowSources
10+
private import experimental.semmle.python.Concepts
11+
private import semmle.python.ApiGraphs
12+
13+
private module NoSQL {
14+
// API Nodes returning `Mongo` instances.
15+
/** Gets a reference to `pymongo.MongoClient` */
16+
private API::Node pyMongo() {
17+
result = API::moduleImport("pymongo").getMember("MongoClient").getReturn()
18+
}
19+
20+
/** Gets a reference to `flask_pymongo.PyMongo` */
21+
private API::Node flask_PyMongo() {
22+
result = API::moduleImport("flask_pymongo").getMember("PyMongo").getReturn()
23+
}
24+
25+
/** Gets a reference to `mongoengine` */
26+
private API::Node mongoEngine() { result = API::moduleImport("mongoengine") }
27+
28+
/** Gets a reference to `flask_mongoengine.MongoEngine` */
29+
private API::Node flask_MongoEngine() {
30+
result = API::moduleImport("flask_mongoengine").getMember("MongoEngine").getReturn()
31+
}
32+
33+
/**
34+
* Gets a reference to an initialized `Mongo` instance.
35+
* See `pyMongo()`, `flask_PyMongo()`
36+
*/
37+
private API::Node mongoInstance() {
38+
result = pyMongo() or
39+
result = flask_PyMongo()
40+
}
41+
42+
/**
43+
* Gets a reference to an initialized `Mongo` DB instance.
44+
* See `mongoEngine()`, `flask_MongoEngine()`
45+
*/
46+
private API::Node mongoDBInstance() {
47+
result = mongoEngine().getMember(["get_db", "connect"]).getReturn() or
48+
result = mongoEngine().getMember("connection").getMember(["get_db", "connect"]).getReturn() or
49+
result = flask_MongoEngine().getMember("get_db").getReturn()
50+
}
51+
52+
/**
53+
* Gets a reference to a `Mongo` DB use.
54+
*
55+
* See `mongoInstance()`, `mongoDBInstance()`.
56+
*/
57+
private DataFlow::LocalSourceNode mongoDB(DataFlow::TypeTracker t) {
58+
t.start() and
59+
(
60+
exists(SubscriptNode subscript |
61+
subscript.getObject() = mongoInstance().getAUse().asCfgNode() and
62+
result.asCfgNode() = subscript
63+
)
64+
or
65+
result.(DataFlow::AttrRead).getObject() = mongoInstance().getAUse()
66+
or
67+
result = mongoDBInstance().getAUse()
68+
)
69+
or
70+
exists(DataFlow::TypeTracker t2 | result = mongoDB(t2).track(t2, t))
71+
}
72+
73+
/**
74+
* Gets a reference to a `Mongo` DB use.
75+
*
76+
* ```py
77+
* from flask_pymongo import PyMongo
78+
* mongo = PyMongo(app)
79+
* mongo.db.user.find({'name': safe_search})
80+
* ```
81+
*
82+
* `mongo.db` would be a use of a `Mongo` instance, and so the result.
83+
*/
84+
private DataFlow::Node mongoDB() { mongoDB(DataFlow::TypeTracker::end()).flowsTo(result) }
85+
86+
/**
87+
* Gets a reference to a `Mongo` collection use.
88+
*
89+
* See `mongoDB()`.
90+
*/
91+
private DataFlow::LocalSourceNode mongoCollection(DataFlow::TypeTracker t) {
92+
t.start() and
93+
(
94+
exists(SubscriptNode subscript | result.asCfgNode() = subscript |
95+
subscript.getObject() = mongoDB().asCfgNode()
96+
)
97+
or
98+
result.(DataFlow::AttrRead).getObject() = mongoDB()
99+
)
100+
or
101+
exists(DataFlow::TypeTracker t2 | result = mongoCollection(t2).track(t2, t))
102+
}
103+
104+
/**
105+
* Gets a reference to a `Mongo` collection use.
106+
*
107+
* ```py
108+
* from flask_pymongo import PyMongo
109+
* mongo = PyMongo(app)
110+
* mongo.db.user.find({'name': safe_search})
111+
* ```
112+
*
113+
* `mongo.db.user` would be a use of a `Mongo` collection, and so the result.
114+
*/
115+
private DataFlow::Node mongoCollection() {
116+
mongoCollection(DataFlow::TypeTracker::end()).flowsTo(result)
117+
}
118+
119+
/** This class represents names of find_* relevant `Mongo` collection-level operation methods. */
120+
private class MongoCollectionMethodNames extends string {
121+
MongoCollectionMethodNames() {
122+
this in [
123+
"find", "find_raw_batches", "find_one", "find_one_and_delete", "find_and_modify",
124+
"find_one_and_replace", "find_one_and_update", "find_one_or_404"
125+
]
126+
}
127+
}
128+
129+
/**
130+
* Gets a reference to a `Mongo` collection method.
131+
*
132+
* ```py
133+
* from flask_pymongo import PyMongo
134+
* mongo = PyMongo(app)
135+
* mongo.db.user.find({'name': safe_search})
136+
* ```
137+
*
138+
* `mongo.db.user.find` would be a collection method, and so the result.
139+
*/
140+
private DataFlow::Node mongoCollectionMethod() {
141+
mongoCollection() = result.(DataFlow::AttrRead).getObject() and
142+
result.(DataFlow::AttrRead).getAttributeName() instanceof MongoCollectionMethodNames
143+
}
144+
145+
/**
146+
* Gets a reference to a `Mongo` collection method call
147+
*
148+
* ```py
149+
* from flask_pymongo import PyMongo
150+
* mongo = PyMongo(app)
151+
* mongo.db.user.find({'name': safe_search})
152+
* ```
153+
*
154+
* `mongo.db.user.find({'name': safe_search})` would be a collection method call, and so the result.
155+
*/
156+
private class MongoCollectionCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
157+
MongoCollectionCall() { this.getFunction() = mongoCollectionMethod() }
158+
159+
override DataFlow::Node getQuery() { result = this.getArg(0) }
160+
}
161+
162+
/**
163+
* Gets a reference to a call from a class whose base is a reference to `mongoEngine()` or `flask_MongoEngine()`'s
164+
* `Document` or `EmbeddedDocument` objects and its attribute is `objects`.
165+
*
166+
* ```py
167+
* from flask_mongoengine import MongoEngine
168+
* db = MongoEngine(app)
169+
* class Movie(db.Document):
170+
* title = db.StringField(required=True)
171+
*
172+
* Movie.objects(__raw__=json_search)
173+
* ```
174+
*
175+
* `Movie.objects(__raw__=json_search)` would be the result.
176+
*/
177+
private class MongoEngineObjectsCall extends DataFlow::CallCfgNode, NoSQLQuery::Range {
178+
MongoEngineObjectsCall() {
179+
this =
180+
[mongoEngine(), flask_MongoEngine()]
181+
.getMember(["Document", "EmbeddedDocument"])
182+
.getASubclass()
183+
.getMember("objects")
184+
.getACall()
185+
}
186+
187+
override DataFlow::Node getQuery() { result = this.getArgByName(_) }
188+
}
189+
190+
/** Gets a reference to `mongosanitizer.sanitizer.sanitize` */
191+
private class MongoSanitizerCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
192+
MongoSanitizerCall() {
193+
this =
194+
API::moduleImport("mongosanitizer").getMember("sanitizer").getMember("sanitize").getACall()
195+
}
196+
197+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
198+
}
199+
200+
/**
201+
* ObjectId returns a string representing an id.
202+
* If at any time ObjectId can't parse it's input (like when a tainted dict in passed in),
203+
* then ObjectId will throw an error preventing the query from running.
204+
*/
205+
private class BsonObjectIdCall extends DataFlow::CallCfgNode, NoSQLSanitizer::Range {
206+
BsonObjectIdCall() {
207+
this =
208+
API::moduleImport(["bson", "bson.objectid", "bson.json_util"])
209+
.getMember("ObjectId")
210+
.getACall()
211+
}
212+
213+
override DataFlow::Node getAnInput() { result = this.getArg(0) }
214+
}
215+
}

0 commit comments

Comments
 (0)