Skip to content

Commit 85f5ad2

Browse files
authored
Merge pull request github#2904 from RasmusWL/python-http-clients
Python: Model outgoing HTTP client requests
2 parents dce121b + 4330d4e commit 85f5ad2

File tree

16 files changed

+267
-2
lines changed

16 files changed

+267
-2
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import semmle.python.web.client.StdLib
2+
import semmle.python.web.client.Requests

python/ql/src/semmle/python/web/Http.qll

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,17 +89,59 @@ abstract class CookieSet extends CookieOperation {}
8989
/** Generic taint sink in a http response */
9090
abstract class HttpResponseTaintSink extends TaintSink {
9191

92-
override predicate sinks(TaintKind kind) {
92+
override predicate sinks(TaintKind kind) {
9393
kind instanceof ExternalStringKind
9494
}
9595

9696
}
9797

9898
abstract class HttpRedirectTaintSink extends TaintSink {
9999

100-
override predicate sinks(TaintKind kind) {
100+
override predicate sinks(TaintKind kind) {
101101
kind instanceof ExternalStringKind
102102
}
103103

104104
}
105105

106+
module Client {
107+
108+
// TODO: user-input in other than URL:
109+
// - `data`, `json` for `requests.post`
110+
// - `body` for `HTTPConnection.request`
111+
// - headers?
112+
113+
// TODO: Add more library support
114+
// - urllib3 https://github.com/urllib3/urllib3
115+
// - httpx https://github.com/encode/httpx
116+
117+
/**
118+
* An outgoing http request
119+
*
120+
* For example:
121+
* conn = HTTPConnection('example.com')
122+
conn.request('GET', '/path')
123+
*/
124+
abstract class HttpRequest extends ControlFlowNode {
125+
126+
/** Get any ControlFlowNode that is used to construct the final URL.
127+
*
128+
* In the HTTPConnection example, there is a result for both `'example.com'` and for `'/path'`.
129+
*/
130+
abstract ControlFlowNode getAUrlPart();
131+
132+
abstract string getMethodUpper();
133+
}
134+
135+
/** Taint sink for the URL-part of an outgoing http request */
136+
class HttpRequestUrlTaintSink extends TaintSink {
137+
138+
HttpRequestUrlTaintSink() {
139+
this = any(HttpRequest r).getAUrlPart()
140+
}
141+
142+
override predicate sinks(TaintKind kind) {
143+
kind instanceof ExternalStringKind
144+
}
145+
146+
}
147+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/**
2+
* Modeling outgoing HTTP requests using the `requests` package
3+
* https://pypi.org/project/requests/
4+
*/
5+
6+
import python
7+
private import semmle.python.web.Http
8+
9+
class RequestsHttpRequest extends Client::HttpRequest, CallNode {
10+
CallableValue func;
11+
string method;
12+
13+
RequestsHttpRequest() {
14+
method = httpVerbLower() and
15+
func = Module::named("requests").attr(method) and
16+
this = func.getACall()
17+
}
18+
19+
override ControlFlowNode getAUrlPart() { result = func.getNamedArgumentForCall(this, "url") }
20+
21+
override string getMethodUpper() { result = method.toUpperCase() }
22+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import python
2+
private import semmle.python.web.Http
3+
4+
ClassValue httpConnectionClass() {
5+
// Python 2
6+
result = Value::named("httplib.HTTPConnection")
7+
or
8+
result = Value::named("httplib.HTTPSConnection")
9+
or
10+
// Python 3
11+
result = Value::named("http.client.HTTPConnection")
12+
or
13+
result = Value::named("http.client.HTTPSConnection")
14+
or
15+
// six
16+
result = Value::named("six.moves.http_client.HTTPConnection")
17+
or
18+
result = Value::named("six.moves.http_client.HTTPSConnection")
19+
}
20+
21+
class HttpConnectionHttpRequest extends Client::HttpRequest, CallNode {
22+
CallNode constructor_call;
23+
CallableValue func;
24+
25+
HttpConnectionHttpRequest() {
26+
exists(ClassValue cls, AttrNode call_origin, Value constructor_call_value |
27+
cls = httpConnectionClass() and
28+
func = cls.lookup("request") and
29+
this = func.getACall() and
30+
// since you can do `r = conn.request; r('GET', path)`, we need to find the origin
31+
this.getFunction().pointsTo(_, _, call_origin) and
32+
// Since HTTPSConnection is a subtype of HTTPConnection, up until this point, `cls` could be either class,
33+
// because `HTTPSConnection.request == HTTPConnection.request`. To avoid generating 2 results, we filter
34+
// on the actual class used as the constructor
35+
call_origin.getObject().pointsTo(_, constructor_call_value, constructor_call) and
36+
cls = constructor_call_value.getClass() and
37+
constructor_call = cls.getACall()
38+
)
39+
}
40+
41+
override ControlFlowNode getAUrlPart() {
42+
result = func.getNamedArgumentForCall(this, "url")
43+
or
44+
result = constructor_call.getArg(0)
45+
or
46+
result = constructor_call.getArgByName("host")
47+
}
48+
49+
override string getMethodUpper() {
50+
exists(string method |
51+
result = method.toUpperCase() and
52+
func.getNamedArgumentForCall(this, "method").pointsTo(Value::forString(method))
53+
)
54+
}
55+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
| test.py:3:1:3:27 | ControlFlowNode for Attribute() | test.py:3:14:3:26 | ControlFlowNode for Str | GET |
2+
| test.py:4:1:4:28 | ControlFlowNode for Attribute() | test.py:4:15:4:27 | ControlFlowNode for Str | POST |
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import python
2+
3+
import semmle.python.web.Http
4+
import semmle.python.web.ClientHttpRequest
5+
6+
from Client::HttpRequest req, string method
7+
where
8+
if exists(req.getMethodUpper())
9+
then method = req.getMethodUpper()
10+
else method = "<NO METHOD>"
11+
select req, req.getAUrlPart(), method
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: -p ../../../../query-tests/Security/lib/ --max-import-depth=1
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import requests
2+
3+
requests.get('example.com')
4+
requests.post('example.com')
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:5:27:5:39 | ControlFlowNode for Str | GET |
2+
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:6:25:6:31 | ControlFlowNode for Str | GET |
3+
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:10:28:10:40 | ControlFlowNode for Str | POST |
4+
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:15:26:15:32 | ControlFlowNode for Str | POST |
5+
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:19:27:19:39 | ControlFlowNode for Str | <NO METHOD> |
6+
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:20:26:20:32 | ControlFlowNode for Str | <NO METHOD> |
7+
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:28:27:28:30 | ControlFlowNode for fake | GET |
8+
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:30:25:30:31 | ControlFlowNode for Str | GET |
9+
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:35:27:35:39 | ControlFlowNode for Str | HEAD |
10+
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:37:22:37:28 | ControlFlowNode for Str | HEAD |
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import python
2+
3+
import semmle.python.web.Http
4+
import semmle.python.web.ClientHttpRequest
5+
6+
from Client::HttpRequest req, string method
7+
where
8+
if exists(req.getMethodUpper())
9+
then method = req.getMethodUpper()
10+
else method = "<NO METHOD>"
11+
select req, req.getAUrlPart(), method

0 commit comments

Comments
 (0)