Skip to content

Commit 6f81685

Browse files
committed
Python: Add modeling of http.client.HTTPResponse
1 parent a5bae30 commit 6f81685

File tree

2 files changed

+233
-40
lines changed

2 files changed

+233
-40
lines changed

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2091,6 +2091,187 @@ private module StdlibPrivate {
20912091
}
20922092
}
20932093

2094+
// ---------------------------------------------------------------------------
2095+
// http.client (Python 3)
2096+
// httplib (Python 2)
2097+
// ---------------------------------------------------------------------------
2098+
/**
2099+
* Provides models for the `http.client.HTTPConnection` and `HTTPSConnection` classes
2100+
*
2101+
* See
2102+
* - https://docs.python.org/3.10/library/http.client.html#http.client.HTTPConnection
2103+
* - https://docs.python.org/3.10/library/http.client.html#http.client.HTTPSConnection
2104+
* - https://docs.python.org/2.7/library/httplib.html#httplib.HTTPConnection
2105+
* - https://docs.python.org/2.7/library/httplib.html#httplib.HTTPSConnection
2106+
*/
2107+
module HTTPConnection {
2108+
/** Gets a reference to the `http.client.HTTPConnection` class. */
2109+
private API::Node classRef() {
2110+
exists(string className | className in ["HTTPConnection", "HTTPSConnection"] |
2111+
// Python 3
2112+
result = API::moduleImport("http").getMember("client").getMember(className)
2113+
or
2114+
// Python 2
2115+
result = API::moduleImport("httplib").getMember(className)
2116+
or
2117+
result =
2118+
API::moduleImport("six").getMember("moves").getMember("http_client").getMember(className)
2119+
)
2120+
}
2121+
2122+
/**
2123+
* A source of instances of `http.client.HTTPConnection`, extend this class to model new instances.
2124+
*
2125+
* This can include instantiations of the class, return values from function
2126+
* calls, or a special parameter that will be set when functions are called by an external
2127+
* library.
2128+
*
2129+
* Use the predicate `HTTPConnection::instance()` to get references to instances of `http.client.HTTPConnection`.
2130+
*/
2131+
abstract class InstanceSource extends DataFlow::LocalSourceNode {
2132+
/** Gets the argument that specified the host, if any. */
2133+
abstract DataFlow::Node getHostArgument();
2134+
}
2135+
2136+
/** A direct instantiation of `http.client.HTTPConnection`. */
2137+
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
2138+
ClassInstantiation() { this = classRef().getACall() }
2139+
2140+
override DataFlow::Node getHostArgument() {
2141+
result in [this.getArg(0), this.getArgByName("host")]
2142+
}
2143+
}
2144+
2145+
/**
2146+
* Gets a reference to an instance of `http.client.HTTPConnection`,
2147+
* that was instantiated with host argument `hostArg`.
2148+
*/
2149+
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, DataFlow::Node hostArg) {
2150+
t.start() and
2151+
hostArg = result.(InstanceSource).getHostArgument()
2152+
or
2153+
exists(DataFlow::TypeTracker t2 | result = instance(t2, hostArg).track(t2, t))
2154+
}
2155+
2156+
/**
2157+
* Gets a reference to an instance of `http.client.HTTPConnection`,
2158+
* that was instantiated with host argument `hostArg`.
2159+
*/
2160+
DataFlow::Node instance(DataFlow::Node hostArg) {
2161+
instance(DataFlow::TypeTracker::end(), hostArg).flowsTo(result)
2162+
}
2163+
2164+
/** A method call on a HTTPConnection that sends off a request */
2165+
private class RequestCall extends HTTP::Client::Request::Range, DataFlow::MethodCallNode {
2166+
RequestCall() { this.calls(instance(_), ["request", "_send_request", "putrequest"]) }
2167+
2168+
override DataFlow::Node getResponse() {
2169+
// TODO: this does not seem like the right abstraction, to allow for nice path-explanations
2170+
//
2171+
// For nice path-explanation, we would like either
2172+
// 1: tainting instance
2173+
// 1a. host on object creation -> obj
2174+
// 1b. url on request call -> obj
2175+
// 2. obj -> obj.getresponse()
2176+
//
2177+
// For now, that's really all we use the `getResponse` predicate for.
2178+
result.(HttpConnectionGetResponseCall).getObject().getALocalSource() =
2179+
this.getObject().getALocalSource()
2180+
}
2181+
2182+
override DataFlow::Node getUrl() {
2183+
result in [this.getArg(1), this.getArgByName("url")]
2184+
or
2185+
this.getObject() = instance(result)
2186+
}
2187+
2188+
override string getFramework() { result = "http.client.HTTP[S]Connection" }
2189+
2190+
override predicate disablesCertificateValidation(
2191+
DataFlow::Node disablingNode, DataFlow::Node argumentOrigin
2192+
) {
2193+
// TODO: Proper alerting of insecure verification settings on SSLContext.
2194+
// Because that is not restricted to HTTP[S]Connection usage, we need something
2195+
// more general, and I would like to tackle that in future PR.
2196+
none()
2197+
}
2198+
}
2199+
2200+
/** A call to the `getresponse` method. */
2201+
private class HttpConnectionGetResponseCall extends DataFlow::MethodCallNode,
2202+
HTTPResponse::InstanceSource {
2203+
HttpConnectionGetResponseCall() { this.calls(instance(_), "getresponse") }
2204+
}
2205+
}
2206+
2207+
/**
2208+
* Provides models for the `http.client.HTTPResponse` class
2209+
*
2210+
* See
2211+
* - https://docs.python.org/3.10/library/http.client.html#httpresponse-objects
2212+
* - https://docs.python.org/3/library/http.client.html#http.client.HTTPResponse.
2213+
*/
2214+
module HTTPResponse {
2215+
/** Gets a reference to the `http.client.HTTPResponse` class. */
2216+
private API::Node classRef() {
2217+
result = API::moduleImport("http").getMember("client").getMember("HTTPResponse")
2218+
}
2219+
2220+
/**
2221+
* A source of instances of `http.client.HTTPResponse`, extend this class to model new instances.
2222+
*
2223+
* A `http.client.HTTPResponse` is itself a file-like object.
2224+
*
2225+
* This can include instantiations of the class, return values from function
2226+
* calls, or a special parameter that will be set when functions are called by an external
2227+
* library.
2228+
*
2229+
* Use the predicate `HTTPResponse::instance()` to get references to instances of `http.client.HTTPResponse`.
2230+
*/
2231+
abstract class InstanceSource extends Stdlib::FileLikeObject::InstanceSource,
2232+
DataFlow::LocalSourceNode { }
2233+
2234+
/** A direct instantiation of `http.client.HTTPResponse`. */
2235+
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
2236+
ClassInstantiation() { this = classRef().getACall() }
2237+
}
2238+
2239+
/** Gets a reference to an instance of `http.client.HTTPResponse`. */
2240+
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
2241+
t.start() and
2242+
result instanceof InstanceSource
2243+
or
2244+
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
2245+
}
2246+
2247+
/** Gets a reference to an instance of `http.client.HTTPResponse`. */
2248+
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
2249+
2250+
/**
2251+
* Taint propagation for `http.client.HTTPResponse`.
2252+
*/
2253+
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
2254+
InstanceTaintSteps() { this = "http.client.HTTPResponse" }
2255+
2256+
override DataFlow::Node getInstance() { result = instance() }
2257+
2258+
override string getAttributeName() { result in ["headers", "msg", "reason", "url"] }
2259+
2260+
override string getMethodName() { result in ["getheader", "getheaders", "info", "geturl",] }
2261+
2262+
override string getAsyncMethodName() { none() }
2263+
}
2264+
2265+
/** An attribute read that is a HTTPMessage instance. */
2266+
private class HTTPMessageInstances extends Stdlib::HTTPMessage::InstanceSource {
2267+
HTTPMessageInstances() {
2268+
this.(DataFlow::AttrRead).accesses(instance(), ["headers", "msg"])
2269+
or
2270+
this.(DataFlow::MethodCallNode).calls(instance(), "info")
2271+
}
2272+
}
2273+
}
2274+
20942275
// ---------------------------------------------------------------------------
20952276
// sqlite3
20962277
// ---------------------------------------------------------------------------

python/ql/test/library-tests/frameworks/stdlib/http_client.py

Lines changed: 52 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -11,34 +11,35 @@
1111

1212

1313
# NOTE: the URL may be relative to host, or may be full URL.
14-
conn = HTTPConnection("example.com") # $ MISSING: clientRequestUrl="example.com"
15-
conn.request("GET", "/") # $ MISSING: clientRequestUrl="/"
16-
conn.request("GET", "http://example.com/") # $ MISSING: clientRequestUrl="http://example.com/"
14+
conn = HTTPConnection("example.com") # $ clientRequestUrl="example.com"
15+
conn.request("GET", "/") # $ clientRequestUrl="/"
16+
url = "http://example.com/"
17+
conn.request("GET", url) # $ clientRequestUrl=url
1718

1819
# kwargs
19-
conn = HTTPConnection(host="example.com") # $ MISSING: clientRequestUrl="example.com"
20-
conn.request(method="GET", url="/") # $ MISSING: clientRequestUrl="/"
20+
conn = HTTPConnection(host="example.com") # $ clientRequestUrl="example.com"
21+
conn.request(method="GET", url="/") # $ clientRequestUrl="/"
2122

2223
# using internal method... you shouldn't but you can
23-
conn._send_request("GET", "url", body=None, headers={}, encode_chunked=False) # $ MISSING: clientRequestUrl="url"
24+
conn._send_request("GET", "url", body=None, headers={}, encode_chunked=False) # $ clientRequestUrl="url"
2425

2526
# low level sending of request
26-
conn.putrequest("GET", "url") # $ MISSING: clientRequestUrl="url"
27+
conn.putrequest("GET", "url") # $ clientRequestUrl="url"
2728
conn.putheader("X-Foo", "value")
2829
conn.endheaders(message_body=None)
2930

3031
# HTTPS
31-
conn = HTTPSConnection("host") # $ MISSING: clientRequestUrl="host"
32-
conn.request("GET", "url") # $ MISSING: clientRequestUrl="url"
32+
conn = HTTPSConnection("host") # $ clientRequestUrl="host"
33+
conn.request("GET", "url") # $ clientRequestUrl="url"
3334

3435
# six aliases
3536
import six
3637

37-
conn = six.moves.http_client.HTTPConnection("host") # $ MISSING: clientRequestUrl="host"
38-
conn.request("GET", "url") # $ MISSING: clientRequestUrl="url"
38+
conn = six.moves.http_client.HTTPConnection("host") # $ clientRequestUrl="host"
39+
conn.request("GET", "url") # $ clientRequestUrl="url"
3940

40-
conn = six.moves.http_client.HTTPSConnection("host") # $ MISSING: clientRequestUrl="host"
41-
conn.request("GET", "url") # $ MISSING: clientRequestUrl="url"
41+
conn = six.moves.http_client.HTTPSConnection("host") # $ clientRequestUrl="host"
42+
conn.request("GET", "url") # $ clientRequestUrl="url"
4243

4344
# ==============================================================================
4445
# Certificate validation disabled
@@ -49,34 +50,34 @@
4950
assert context.check_hostname == True
5051
assert context.verify_mode == ssl.CERT_REQUIRED
5152

52-
conn = HTTPSConnection("host", context=context) # $ MISSING: clientRequestUrl="host"
53-
conn.request("GET", "url") # $ MISSING: clientRequestUrl="url"
53+
conn = HTTPSConnection("host", context=context) # $ clientRequestUrl="host"
54+
conn.request("GET", "url") # $ clientRequestUrl="url"
5455

5556
# `_create_default_https_context` is currently just an alias for `create_default_context`
5657
# which creates a context for SERVER_AUTH purpose.
5758
context = ssl.create_default_context()
5859
assert context.check_hostname == True
5960
assert context.verify_mode == ssl.CERT_REQUIRED
6061

61-
conn = HTTPSConnection("host", context=context) # $ MISSING: clientRequestUrl="host"
62-
conn.request("GET", "url") # $ MISSING: clientRequestUrl="url"
62+
conn = HTTPSConnection("host", context=context) # $ clientRequestUrl="host"
63+
conn.request("GET", "url") # $ clientRequestUrl="url"
6364

6465
# however, if you supply your own SSLContext, you need to set it manually
6566
context = ssl.SSLContext()
6667
assert context.check_hostname == False
6768
assert context.verify_mode == ssl.CERT_NONE
6869

69-
conn = HTTPSConnection("host", context=context) # $ MISSING: clientRequestUrl="host"
70-
conn.request("GET", "url") # $ MISSING: clientRequestUrl="url" clientRequestCertValidationDisabled
70+
conn = HTTPSConnection("host", context=context) # $ clientRequestUrl="host"
71+
conn.request("GET", "url") # $ clientRequestUrl="url" MISSING: clientRequestCertValidationDisabled
7172

7273
# and if you misunderstood whether to use server/client in the purpose, you will also
7374
# get a context without hostname verification.
7475
context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
7576
assert context.check_hostname == False
7677
assert context.verify_mode == ssl.CERT_NONE
7778

78-
conn = HTTPSConnection("host", context=context) # $ MISSING: clientRequestUrl="host"
79-
conn.request("GET", "url") # $ MISSING: clientRequestUrl="url" clientRequestCertValidationDisabled
79+
conn = HTTPSConnection("host", context=context) # $ clientRequestUrl="host"
80+
conn.request("GET", "url") # $ clientRequestUrl="url" MISSING: clientRequestCertValidationDisabled
8081

8182
# NOTICE that current documentation says
8283
#
@@ -89,17 +90,17 @@
8990
context.check_hostname = True
9091
assert context.verify_mode == ssl.CERT_REQUIRED
9192

92-
conn = HTTPSConnection("host", context=context) # $ MISSING: clientRequestUrl="host"
93-
conn.request("GET", "url") # $ MISSING: clientRequestUrl="url"
93+
conn = HTTPSConnection("host", context=context) # $ clientRequestUrl="host"
94+
conn.request("GET", "url") # $ clientRequestUrl="url"
9495

9596
# only setting verify_mode is not enough, since check_hostname is not enabled
9697

9798
context = ssl.SSLContext()
9899
context.verify_mode = ssl.CERT_REQUIRED
99100
assert context.check_hostname == False
100101

101-
conn = HTTPSConnection("host", context=context) # $ MISSING: clientRequestUrl="host"
102-
conn.request("GET", "url") # $ MISSING: clientRequestUrl="url" clientRequestCertValidationDisabled
102+
conn = HTTPSConnection("host", context=context) # $ clientRequestUrl="host"
103+
conn.request("GET", "url") # $ clientRequestUrl="url" MISSING: clientRequestCertValidationDisabled
103104

104105
# ==============================================================================
105106
# taint test
@@ -111,8 +112,8 @@ def taint_test():
111112
host = request.args['host']
112113
url = request.args['url']
113114

114-
conn = HTTPConnection(host) # $ MISSING: clientRequestUrl=host
115-
conn.request("GET", url) # $ MISSING: clientRequestUrl=url
115+
conn = HTTPConnection(host) # $ clientRequestUrl=host
116+
conn.request("GET", url) # $ clientRequestUrl=url
116117

117118
resp = conn.getresponse()
118119

@@ -122,37 +123,48 @@ def taint_test():
122123
# https://docs.python.org/3/library/http.client.html#http.client.HTTPResponse
123124

124125
# a HTTPResponse itself is file-like
125-
resp, # $ MISSING: tainted
126-
resp.read(), # $ MISSING: tainted
126+
resp, # $ tainted
127+
resp.read(), # $ tainted
127128

128-
resp.getheader("name"), # $ MISSING: tainted
129-
resp.getheaders(), # $ MISSING: tainted
129+
resp.getheader("name"), # $ tainted
130+
resp.getheaders(), # $ tainted
130131

131132
# http.client.HTTPMessage
132-
resp.headers, # $ MISSING: tainted
133-
resp.headers.get_all(), # $ MISSING: tainted
133+
resp.headers, # $ tainted
134+
resp.headers.get_all(), # $ tainted
134135

135136
# Alias for .headers
136137
# http.client.HTTPMessage
137-
resp.msg, # $ MISSING: tainted
138-
resp.msg.get_all(), # $ MISSING: tainted
138+
resp.msg, # $ tainted
139+
resp.msg.get_all(), # $ tainted
139140

140141
# Alias for .headers
141-
resp.info(), # $ MISSING: tainted
142-
resp.info().get_all(), # $ MISSING: tainted
142+
resp.info(), # $ tainted
143+
resp.info().get_all(), # $ tainted
143144

144145
# although this would usually be the textual version of the status
145146
# ("OK" for 200), it is possible to put your own evil data in here.
146-
resp.reason, # $ MISSING: tainted
147+
resp.reason, # $ tainted
147148

148149
# the URL of the recourse that was visited, if redirects were followed.
149150
# I don't see any reason this could not contain evil data.
150-
resp.url, # $ MISSING: tainted
151-
resp.geturl(), # $ MISSING: tainted
151+
resp.url, # $ tainted
152+
resp.geturl(), # $ tainted
152153
)
153154

154155
ensure_not_tainted(
155156
resp.status,
156157
resp.code,
157158
resp.getcode(),
158159
)
160+
161+
# check that only setting either host/url is enough to propagate taint
162+
conn = HTTPConnection("host") # $ clientRequestUrlPart="host"
163+
conn.request("GET", url) # $ clientRequestUrlPart=url
164+
resp = conn.getresponse()
165+
ensure_tainted(resp) # $ tainted
166+
167+
conn = HTTPConnection(host) # $ clientRequestUrlPart=host
168+
conn.request("GET", "url") # $ clientRequestUrlPart="url"
169+
resp = conn.getresponse()
170+
ensure_tainted(resp) # $ tainted

0 commit comments

Comments
 (0)