Skip to content

Commit cd5399d

Browse files
committed
Python: Model outgoing http client requests
1 parent db33c36 commit cd5399d

File tree

18 files changed

+314
-2
lines changed

18 files changed

+314
-2
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import semmle.python.web.client.StdLib
2+
import semmle.python.web.client.Requests

python/ql/src/semmle/python/web/Http.qll

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,17 +89,59 @@ abstract class CookieSet extends CookieOperation {}
8989
/** Generic taint sink in a http response */
9090
abstract class HttpResponseTaintSink extends TaintSink {
9191

92-
override predicate sinks(TaintKind kind) {
92+
override predicate sinks(TaintKind kind) {
9393
kind instanceof ExternalStringKind
9494
}
9595

9696
}
9797

9898
abstract class HttpRedirectTaintSink extends TaintSink {
9999

100-
override predicate sinks(TaintKind kind) {
100+
override predicate sinks(TaintKind kind) {
101101
kind instanceof ExternalStringKind
102102
}
103103

104104
}
105105

106+
module Client {
107+
108+
// TODO: user-input in other than URL:
109+
// - `data`, `json` for `requests.post`
110+
// - `body` for `HTTPConnection.request`
111+
// - headers?
112+
113+
// TODO: Add more library support
114+
// - urllib3 https://github.com/urllib3/urllib3
115+
// - httpx https://github.com/encode/httpx
116+
117+
/**
118+
* An outgoing http request
119+
*
120+
* For example:
121+
* conn = HTTPConnection('example.com')
122+
conn.request('GET', '/path')
123+
*/
124+
abstract class HttpRequest extends CallNode {
125+
126+
/** Get any ControlFlowNode that is used to construct the final URL.
127+
*
128+
* In the HTTPConnection example, there is a result for both `'example.com'` and for `'/path'`.
129+
*/
130+
abstract ControlFlowNode getAUrlPart();
131+
132+
abstract string getMethodUpper();
133+
}
134+
135+
/** Taint sink for the URL-part of an outgoing http request */
136+
class HttpRequestUrlTaintSink extends TaintSink {
137+
138+
HttpRequestUrlTaintSink() {
139+
this = any(HttpRequest r).getAUrlPart()
140+
}
141+
142+
override predicate sinks(TaintKind kind) {
143+
kind instanceof ExternalStringKind
144+
}
145+
146+
}
147+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/**
2+
* Modeling outgoing HTTP requests using the `requests` package
3+
* https://pypi.org/project/requests/
4+
*/
5+
6+
import python
7+
private import semmle.python.web.Http
8+
9+
class RequestsHttpRequest extends Client::HttpRequest {
10+
CallableValue func;
11+
string method;
12+
13+
RequestsHttpRequest() {
14+
method = httpVerbLower() and
15+
func = Module::named("requests").attr(method).(CallableValue) and
16+
this = func.getACall()
17+
}
18+
19+
override ControlFlowNode getAUrlPart() { result = func.getNamedArgumentForCall(this, "url") }
20+
21+
override string getMethodUpper() { result = method.toUpperCase() }
22+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import python
2+
private import semmle.python.web.Http
3+
4+
ClassValue httpConnectionClass() {
5+
// Python 2
6+
result = Value::named("httplib.HTTPConnection")
7+
or
8+
result = Value::named("httplib.HTTPSConnection")
9+
or
10+
// Python 3
11+
result = Value::named("http.client.HTTPConnection")
12+
or
13+
result = Value::named("http.client.HTTPSConnection")
14+
or
15+
// six
16+
result = Value::named("six.moves.http_client.HTTPConnection")
17+
or
18+
result = Value::named("six.moves.http_client.HTTPSConnection")
19+
}
20+
21+
class HttpConnectionHttpRequest extends Client::HttpRequest {
22+
CallNode constructor_call;
23+
CallableValue func;
24+
25+
HttpConnectionHttpRequest() {
26+
exists(ClassValue cls, AttrNode call_origin, Value constructor_call_value |
27+
cls = httpConnectionClass() and
28+
func = cls.lookup("request") and
29+
this = func.getACall() and
30+
this.getFunction().pointsTo(_, _, call_origin) and
31+
call_origin.getObject().pointsTo(_, constructor_call_value, constructor_call) and
32+
cls = constructor_call_value.getClass() and
33+
constructor_call = cls.getACall()
34+
)
35+
36+
}
37+
38+
override ControlFlowNode getAUrlPart() {
39+
result = func.getNamedArgumentForCall(this, "url")
40+
or
41+
result = constructor_call.getArg(0)
42+
or
43+
result = constructor_call.getArgByName("host")
44+
}
45+
46+
override string getMethodUpper() {
47+
exists(string method |
48+
result = method.toUpperCase() and
49+
func.getNamedArgumentForCall(this, "method").pointsTo(Value::forString(method))
50+
)
51+
}
52+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:5:27:5:39 | ControlFlowNode for Str | GET |
2+
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:6:25:6:31 | ControlFlowNode for Str | GET |
3+
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:10:28:10:40 | ControlFlowNode for Str | POST |
4+
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:15:26:15:32 | ControlFlowNode for Str | POST |
5+
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:19:27:19:39 | ControlFlowNode for Str | <NO METHOD> |
6+
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:20:26:20:32 | ControlFlowNode for Str | <NO METHOD> |
7+
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:28:27:28:30 | ControlFlowNode for fake | GET |
8+
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:30:25:30:31 | ControlFlowNode for Str | GET |
9+
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:35:27:35:39 | ControlFlowNode for Str | HEAD |
10+
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:37:22:37:28 | ControlFlowNode for Str | HEAD |
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import python
2+
3+
import semmle.python.web.Http
4+
import semmle.python.web.ClientHttpRequest
5+
6+
from Client::HttpRequest req, string method
7+
where
8+
if exists(req.getMethodUpper())
9+
then method = req.getMethodUpper()
10+
else method = "<NO METHOD>"
11+
select req, req.getAUrlPart(), method
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from httplib import HTTPConnection, HTTPSConnection
2+
3+
4+
def basic():
5+
conn = HTTPConnection('example.com')
6+
conn.request('GET', '/path')
7+
8+
9+
def indirect_caller():
10+
conn = HTTPSConnection('example.com')
11+
indirect_callee(conn)
12+
13+
14+
def indirect_callee(conn):
15+
conn.request('POST', '/path')
16+
17+
18+
def method_not_known(method):
19+
conn = HTTPConnection('example.com')
20+
conn.request(method, '/path')
21+
22+
23+
def sneaky_setting_host():
24+
# We don't handle that the host is overwritten directly.
25+
# A contrived example; you're not supposed to do this, but you certainly can.
26+
fake = 'fakehost.com'
27+
real = 'realhost.com'
28+
conn = HTTPConnection(fake)
29+
conn.host = real
30+
conn.request('GET', '/path')
31+
32+
33+
def tricky_not_attribute_node():
34+
# A contrived example; you're not supposed to do this, but you certainly can.
35+
conn = HTTPConnection('example.com')
36+
req_meth = conn.request
37+
req_meth('HEAD', '/path')
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:5:27:5:39 | ControlFlowNode for Str | GET |
2+
| test.py:6:5:6:32 | ControlFlowNode for Attribute() | test.py:6:25:6:31 | ControlFlowNode for Str | GET |
3+
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:10:28:10:40 | ControlFlowNode for Str | POST |
4+
| test.py:15:5:15:33 | ControlFlowNode for Attribute() | test.py:15:26:15:32 | ControlFlowNode for Str | POST |
5+
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:19:27:19:39 | ControlFlowNode for Str | <NO METHOD> |
6+
| test.py:20:5:20:33 | ControlFlowNode for Attribute() | test.py:20:26:20:32 | ControlFlowNode for Str | <NO METHOD> |
7+
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:28:27:28:30 | ControlFlowNode for fake | GET |
8+
| test.py:30:5:30:32 | ControlFlowNode for Attribute() | test.py:30:25:30:31 | ControlFlowNode for Str | GET |
9+
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:35:27:35:39 | ControlFlowNode for Str | HEAD |
10+
| test.py:37:5:37:29 | ControlFlowNode for req_meth() | test.py:37:22:37:28 | ControlFlowNode for Str | HEAD |
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import python
2+
3+
import semmle.python.web.Http
4+
import semmle.python.web.ClientHttpRequest
5+
6+
from Client::HttpRequest req, string method
7+
where
8+
if exists(req.getMethodUpper())
9+
then method = req.getMethodUpper()
10+
else method = "<NO METHOD>"
11+
select req, req.getAUrlPart(), method
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from http.client import HTTPConnection, HTTPSConnection
2+
3+
4+
def basic():
5+
conn = HTTPConnection('example.com')
6+
conn.request('GET', '/path')
7+
8+
9+
def indirect_caller():
10+
conn = HTTPSConnection('example.com')
11+
indirect_callee(conn)
12+
13+
14+
def indirect_callee(conn):
15+
conn.request('POST', '/path')
16+
17+
18+
def method_not_known(method):
19+
conn = HTTPConnection('example.com')
20+
conn.request(method, '/path')
21+
22+
23+
def sneaky_setting_host():
24+
# We don't handle that the host is overwritten directly.
25+
# A contrived example; you're not supposed to do this, but you certainly can.
26+
fake = 'fakehost.com'
27+
real = 'realhost.com'
28+
conn = HTTPConnection(fake)
29+
conn.host = real
30+
conn.request('GET', '/path')
31+
32+
33+
def tricky_not_attribute_node():
34+
# A contrived example; you're not supposed to do this, but you certainly can.
35+
conn = HTTPConnection('example.com')
36+
req_meth = conn.request
37+
req_meth('HEAD', '/path')

0 commit comments

Comments
 (0)