Skip to content

Commit 4b1f6f0

Browse files
authored
Merge pull request #10629 from RasmusWL/fix-flask-source
Python: Fix flask request modeling
2 parents 5cadd3c + 60527df commit 4b1f6f0

File tree

28 files changed

+583
-259
lines changed

28 files changed

+583
-259
lines changed

python/ql/lib/semmle/python/frameworks/Flask.qll

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -354,13 +354,7 @@ module Flask {
354354
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
355355
*/
356356
private class FlaskRequestSource extends RemoteFlowSource::Range {
357-
FlaskRequestSource() {
358-
this = request().getAValueReachableFromSource() and
359-
not any(Import imp).contains(this.asExpr()) and
360-
not exists(ControlFlowNode def | this.asVar().getSourceVariable().hasDefiningNode(def) |
361-
any(Import imp).contains(def.getNode())
362-
)
363-
}
357+
FlaskRequestSource() { this = request().asSource() }
364358

365359
override string getSourceType() { result = "flask.request" }
366360
}

python/ql/lib/semmle/python/frameworks/Tornado.qll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ private import semmle.python.frameworks.Stdlib
1414
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
1515

1616
/**
17+
* INTERNAL: Do not use.
18+
*
1719
* Provides models for the `tornado` PyPI package.
1820
* See https://www.tornadoweb.org/en/stable/.
1921
*/
20-
private module Tornado {
22+
module Tornado {
2123
/**
2224
* Provides models for the `tornado.httputil.HTTPHeaders` class
2325
*
@@ -126,8 +128,7 @@ private module Tornado {
126128
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
127129

128130
/** The `self` parameter in a method on the `tornado.web.RequestHandler` class or any subclass. */
129-
private class SelfParam extends InstanceSource, RemoteFlowSource::Range,
130-
DataFlow::ParameterNode {
131+
class SelfParam extends InstanceSource, RemoteFlowSource::Range, DataFlow::ParameterNode {
131132
SelfParam() {
132133
exists(RequestHandlerClass cls | cls.getAMethod().getArg(0) = this.getParameter())
133134
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: fix
3+
---
4+
* Fixed how `flask.request` is modeled as a RemoteFlowSource, such that we show fewer duplicated alert messages for Code Scanning alerts. The import, such as `from flask import request`, will now be shown as the first step in a path explanation.

python/ql/src/experimental/Security/CWE-348/ClientSuppliedIpUsedInSecurityCheckLib.qll

Lines changed: 33 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,57 @@
11
private import python
22
private import semmle.python.Concepts
33
private import semmle.python.ApiGraphs
4-
private import semmle.python.dataflow.new.RemoteFlowSources
4+
private import semmle.python.frameworks.Flask
5+
private import semmle.python.frameworks.Django
6+
private import semmle.python.frameworks.Tornado
57

68
/**
79
* A data flow source of the client ip obtained according to the remote endpoint identifier specified
810
* (`X-Forwarded-For`, `X-Real-IP`, `Proxy-Client-IP`, etc.) in the header.
911
*
1012
* For example: `request.headers.get("X-Forwarded-For")`.
13+
*
14+
* A call to `request.headers.get` or `request.headers.get_all` or `request.headers.getlist`.
1115
*/
12-
abstract class ClientSuppliedIpUsedInSecurityCheck extends DataFlow::CallCfgNode { }
16+
abstract class ClientSuppliedIpUsedInSecurityCheck extends DataFlow::Node { }
1317

14-
private class FlaskClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck {
18+
private class FlaskClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck,
19+
DataFlow::MethodCallNode {
1520
FlaskClientSuppliedIpUsedInSecurityCheck() {
16-
exists(RemoteFlowSource rfs, DataFlow::AttrRead get |
17-
rfs.getSourceType() = "flask.request" and this.getFunction() = get
18-
|
19-
// `get` is a call to request.headers.get or request.headers.get_all or request.headers.getlist
20-
// request.headers
21-
get.getObject()
22-
.(DataFlow::AttrRead)
23-
// request
24-
.getObject()
25-
.getALocalSource() = rfs and
26-
get.getAttributeName() in ["get", "get_all", "getlist"] and
27-
get.getObject().(DataFlow::AttrRead).getAttributeName() = "headers" and
28-
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
29-
)
21+
this = Flask::request().getMember("headers").getMember(["get", "get_all", "getlist"]).getACall() and
22+
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
3023
}
3124
}
3225

33-
private class DjangoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck {
26+
private class DjangoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck,
27+
DataFlow::MethodCallNode {
3428
DjangoClientSuppliedIpUsedInSecurityCheck() {
35-
exists(RemoteFlowSource rfs, DataFlow::AttrRead get |
36-
rfs.getSourceType() = "django.http.request.HttpRequest" and this.getFunction() = get
37-
|
38-
// `get` is a call to request.headers.get or request.META.get
39-
// request.headers
40-
get.getObject()
41-
.(DataFlow::AttrRead)
42-
// request
43-
.getObject()
44-
.getALocalSource() = rfs and
45-
get.getAttributeName() = "get" and
46-
get.getObject().(DataFlow::AttrRead).getAttributeName() in ["headers", "META"] and
47-
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
48-
)
29+
exists(DataFlow::Node req, DataFlow::AttrRead headers |
30+
// a call to request.headers.get or request.META.get
31+
req = PrivateDjango::DjangoImpl::DjangoHttp::Request::HttpRequest::instance() and
32+
headers.getObject().getALocalSource() = req and
33+
headers.getAttributeName() in ["headers", "META"] and
34+
this.calls(headers, "get")
35+
) and
36+
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
4937
}
5038
}
5139

52-
private class TornadoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck {
40+
private class TornadoClientSuppliedIpUsedInSecurityCheck extends ClientSuppliedIpUsedInSecurityCheck,
41+
DataFlow::MethodCallNode {
5342
TornadoClientSuppliedIpUsedInSecurityCheck() {
54-
exists(RemoteFlowSource rfs, DataFlow::AttrRead get |
55-
rfs.getSourceType() = "tornado.web.RequestHandler" and this.getFunction() = get
43+
// a call to self.request.headers.get or self.request.headers.get_list inside a tornado requesthandler
44+
exists(
45+
Tornado::TornadoModule::Web::RequestHandler::SelfParam selfParam, DataFlow::AttrRead headers,
46+
DataFlow::AttrRead req
5647
|
57-
// `get` is a call to `rfs`.request.headers.get
58-
// `rfs`.request.headers
59-
get.getObject()
60-
.(DataFlow::AttrRead)
61-
// `rfs`.request
62-
.getObject()
63-
.(DataFlow::AttrRead)
64-
// `rfs`
65-
.getObject()
66-
.getALocalSource() = rfs and
67-
get.getAttributeName() in ["get", "get_list"] and
68-
get.getObject().(DataFlow::AttrRead).getAttributeName() = "headers" and
69-
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
70-
)
48+
req.getObject().getALocalSource() = selfParam and
49+
req.getAttributeName() = "request" and
50+
headers.getObject().getALocalSource() = req and
51+
headers.getAttributeName() = "headers" and
52+
this.calls(headers, ["get", "get_list"])
53+
) and
54+
this.getArg(0).asExpr().(StrConst).getText().toLowerCase() = clientIpParameterName()
7155
}
7256
}
7357

python/ql/src/meta/alerts/RemoteFlowSourcesReach.ql

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,28 +25,15 @@ class RemoteFlowSourceReach extends TaintTracking::Configuration {
2525
}
2626

2727
override predicate isSink(DataFlow::Node node) {
28-
not node.getLocation().getFile() instanceof IgnoredFile and
29-
(
30-
node instanceof RemoteFlowSource
31-
or
32-
this.isAdditionalFlowStep(_, node)
33-
) and
34-
// In september 2021 we changed how we do taint-propagation for method calls (mostly
35-
// relating to modeled frameworks/libraries). We used to do `obj -> obj.meth` and
36-
// `obj.meth -> obj.meth()` in two separate steps, and now do them in one
37-
// `obj -> obj.meth()`. To be able to compare the overall reach between these two
38-
// version, we don't want this query to alert us to the fact that we no longer taint
39-
// the node in the middle (since that is just noise).
40-
// see https://github.com/github/codeql/pull/6349
28+
not node.getLocation().getFile() instanceof IgnoredFile
29+
// We could try to reduce the number of sinks in this configuration, by only
30+
// allowing something that is on one end of a localFlowStep, readStep or storeStep,
31+
// however, it's a brittle solution that requires us to remember to update this file
32+
// if/when adding something new to the data-flow library.
4133
//
42-
// We should be able to remove the following few lines of code once we don't care to
43-
// compare with the old (before September 2021) way of doing taint-propagation for
44-
// method calls.
45-
not exists(DataFlow::MethodCallNode c |
46-
node = c.getFunction() and
47-
this.isAdditionalFlowStep(c.getObject(), node) and
48-
this.isAdditionalFlowStep(node, c)
49-
)
34+
// From testing on a few projects, trying to reduce the number of nodes, we only
35+
// gain a reduction in the range of 40%, and while that's nice, it doesn't seem
36+
// worth it to me for a meta query.
5037
}
5138
}
5239

0 commit comments

Comments
 (0)