Skip to content

Commit d388dd5

Browse files
committed
Python: Model HTTPMessage from Stdlib
1 parent f3ce393 commit d388dd5

File tree

2 files changed

+61
-4
lines changed

2 files changed

+61
-4
lines changed

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,56 @@ module Stdlib {
6565
}
6666
}
6767
}
68+
69+
/**
70+
* Provides models for the `http.client.HTTPMessage` class
71+
*
72+
* Has no official docs, but see
73+
* https://github.com/python/cpython/blob/64f54b7ccd49764b0304e076bfd79b5482988f53/Lib/http/client.py#L175
74+
* and https://docs.python.org/3.9/library/email.compat32-message.html#email.message.Message
75+
*/
76+
module HTTPMessage {
77+
/**
78+
* A source of instances of `http.client.HTTPMessage`, extend this class to model new instances.
79+
*
80+
* This can include instantiations of the class, return values from function
81+
* calls, or a special parameter that will be set when functions are called by an external
82+
* library.
83+
*
84+
* Use the predicate `HTTPMessage::instance()` to get references to instances of `http.client.HTTPMessage`.
85+
*/
86+
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
87+
88+
/** Gets a reference to an instance of `http.client.HTTPMessage`. */
89+
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
90+
t.start() and
91+
result instanceof InstanceSource
92+
or
93+
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
94+
}
95+
96+
/** Gets a reference to an instance of `http.client.HTTPMessage`. */
97+
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
98+
99+
/**
100+
* Taint propagation for `http.client.HTTPMessage`.
101+
*/
102+
private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
103+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
104+
// Methods
105+
//
106+
// TODO: When we have tools that make it easy, model these properly to handle
107+
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
108+
// (since it allows us to at least capture the most common cases).
109+
nodeFrom = instance() and
110+
exists(DataFlow::AttrRead attr | attr.getObject() = nodeFrom |
111+
// normal (non-async) methods
112+
attr.getAttributeName() in ["get_all", "as_bytes", "as_string", "keys"] and
113+
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
114+
)
115+
}
116+
}
117+
}
68118
}
69119

70120
/**
@@ -974,6 +1024,13 @@ private module StdlibPrivate {
9741024
}
9751025
}
9761026

1027+
/** An `HTTPMessage` instance that originates from a `BaseHTTPRequestHandler` instance. */
1028+
private class BaseHTTPRequestHandlerHeadersInstances extends Stdlib::HTTPMessage::InstanceSource {
1029+
BaseHTTPRequestHandlerHeadersInstances() {
1030+
this.(DataFlow::AttrRead).accesses(instance(), "headers")
1031+
}
1032+
}
1033+
9771034
/**
9781035
* The entry-point for handling a request with a `BaseHTTPRequestHandler` subclass.
9791036
*

python/ql/test/library-tests/frameworks/stdlib/http_server.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,12 @@ def taint_sources(self):
5858
self.headers, # $ tainted
5959
self.headers['Foo'], # $ tainted
6060
self.headers.get('Foo'), # $ tainted
61-
self.headers.get_all('Foo'), # $ MISSING: tainted
62-
self.headers.keys(), # $ MISSING: tainted
61+
self.headers.get_all('Foo'), # $ tainted
62+
self.headers.keys(), # $ tainted
6363
self.headers.values(), # $ tainted
6464
self.headers.items(), # $ tainted
65-
self.headers.as_bytes(), # $ MISSING: tainted
66-
self.headers.as_string(), # $ MISSING: tainted
65+
self.headers.as_bytes(), # $ tainted
66+
self.headers.as_string(), # $ tainted
6767
str(self.headers), # $ tainted
6868
bytes(self.headers), # $ tainted
6969

0 commit comments

Comments
 (0)