Skip to content

Commit fb21bc0

Browse files
committed
Python: Add taint-steps for yarl.URL
1 parent 72e6a14 commit fb21bc0

File tree

5 files changed

+159
-38
lines changed

5 files changed

+159
-38
lines changed

docs/codeql/support/reusables/frameworks.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ Python built-in support
164164
idna, Utility library
165165
invoke, Utility library
166166
multidict, Utility library
167+
yarl, Utility library
167168
mysql-connector-python, Database
168169
MySQLdb, Database
169170
psycopg2, Database

python/ql/src/semmle/python/Frameworks.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ private import semmle.python.frameworks.Stdlib
2323
private import semmle.python.frameworks.Tornado
2424
private import semmle.python.frameworks.Ujson
2525
private import semmle.python.frameworks.Yaml
26+
private import semmle.python.frameworks.Yarl

python/ql/src/semmle/python/frameworks/Aiohttp.qll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ private import semmle.python.Concepts
1111
private import semmle.python.ApiGraphs
1212
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
1313
private import semmle.python.frameworks.Multidict
14+
private import semmle.python.frameworks.Yarl
1415

1516
/**
1617
* INTERNAL: Do not use.
@@ -248,4 +249,11 @@ module AiohttpWebModel {
248249
this.(DataFlow::AttrRead).getAttributeName() in ["query", "headers"]
249250
}
250251
}
252+
253+
class AiohttpRequestYarlUrlInstances extends Yarl::Url::InstanceSource {
254+
AiohttpRequestYarlUrlInstances() {
255+
this.(DataFlow::AttrRead).getObject() = Request::instance() and
256+
this.(DataFlow::AttrRead).getAttributeName() in ["url", "rel_url"]
257+
}
258+
}
251259
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `yarl` PyPI package.
3+
* See https://yarl.readthedocs.io/en/stable/.
4+
*/
5+
6+
private import python
7+
private import semmle.python.dataflow.new.DataFlow
8+
private import semmle.python.dataflow.new.TaintTracking
9+
private import semmle.python.Concepts
10+
private import semmle.python.ApiGraphs
11+
private import semmle.python.frameworks.Multidict
12+
13+
/**
14+
* INTERNAL: Do not use.
15+
*
16+
* Provides models for the `yarl` PyPI package.
17+
* See https://multidict.readthedocs.io/en/stable/.
18+
*/
19+
module Yarl {
20+
/**
21+
* Provides models for a the `yarl.URL` class:
22+
*
23+
* See https://yarl.readthedocs.io/en/stable/api.html#yarl.URL
24+
*/
25+
module Url {
26+
/**
27+
* A source of instances of `yarl.URL`, extend this class to model new instances.
28+
*
29+
* This can include instantiations of the class, return values from function
30+
* calls, or a special parameter that will be set when functions are called by an external
31+
* library.
32+
*
33+
* Use `Url::instance()` predicate to get references to instances of `yarl.URL`.
34+
*/
35+
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
36+
37+
/** Gets a reference to an instance of `yarl.URL`. */
38+
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
39+
t.start() and
40+
result instanceof InstanceSource
41+
or
42+
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
43+
}
44+
45+
/** Gets a reference to an instance of `yarl.URL`. */
46+
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
47+
48+
/**
49+
* Taint propagation for `yarl.URL`.
50+
*
51+
* See https://yarl.readthedocs.io/en/stable/api.html#yarl.URL
52+
*/
53+
class YarlUrlAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
54+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
55+
// Methods
56+
//
57+
// TODO: When we have tools that make it easy, model these properly to handle
58+
// `meth = obj.meth; meth()`. Until then, we'll use this more syntactic approach
59+
// (since it allows us to at least capture the most common cases).
60+
exists(DataFlow::AttrRead attr |
61+
// methods (that replaces part of URL, taken as only arguments)
62+
attr.getAttributeName() in [
63+
"with_scheme", "with_user", "with_password", "with_host", "with_port", "with_path",
64+
"with_query", "with_query", "update_query", "update_query", "with_fragment",
65+
"with_name",
66+
// join is a bit different, but is still correct to add here :+1:
67+
"join"
68+
] and
69+
(
70+
// obj -> obj.meth()
71+
nodeFrom = instance() and
72+
attr.getObject() = nodeFrom and
73+
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
74+
or
75+
// argument of obj.meth() -> obj.meth()
76+
attr.getObject() = instance() and
77+
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr and
78+
nodeFrom in [
79+
nodeTo.(DataFlow::CallCfgNode).getArg(_),
80+
nodeTo.(DataFlow::CallCfgNode).getArgByName(_)
81+
]
82+
)
83+
or
84+
// other methods
85+
nodeFrom = instance() and
86+
attr.getObject() = nodeFrom and
87+
attr.getAttributeName() in ["human_repr"] and
88+
nodeTo.(DataFlow::CallCfgNode).getFunction() = attr
89+
)
90+
or
91+
// Attributes
92+
nodeFrom = instance() and
93+
nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom and
94+
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
95+
"user", "raw_user", "password", "raw_password", "host", "raw_host", "port",
96+
"explicit_port", "authority", "raw_authority", "path", "raw_path", "path_qs",
97+
"raw_path_qs", "query_string", "raw_query_string", "fragment", "raw_fragment", "parts",
98+
"raw_parts", "name", "raw_name", "query"
99+
]
100+
}
101+
}
102+
103+
class YarlUrlMultiDictProxyInstance extends Multidict::MultiDictProxy::InstanceSource {
104+
YarlUrlMultiDictProxyInstance() {
105+
this.(DataFlow::AttrRead).getObject() = Yarl::Url::instance() and
106+
this.(DataFlow::AttrRead).getAttributeName() = "query"
107+
}
108+
}
109+
}
110+
}

python/ql/test/library-tests/frameworks/aiohttp/taint_test.py

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -136,59 +136,60 @@ async def test_taint(request: web.Request): # $ requestHandler
136136
import yarl
137137

138138
ensure_tainted(
139-
request.url.user, # $ MISSING: tainted
140-
request.url.raw_user, # $ MISSING: tainted
139+
# see https://yarl.readthedocs.io/en/stable/api.html#yarl.URL
140+
request.url.user, # $ tainted
141+
request.url.raw_user, # $ tainted
141142

142-
request.url.password, # $ MISSING: tainted
143-
request.url.raw_password, # $ MISSING: tainted
143+
request.url.password, # $ tainted
144+
request.url.raw_password, # $ tainted
144145

145-
request.url.host, # $ MISSING: tainted
146-
request.url.raw_host, # $ MISSING: tainted
146+
request.url.host, # $ tainted
147+
request.url.raw_host, # $ tainted
147148

148-
request.url.port, # $ MISSING: tainted
149-
request.url.explicit_port, # $ MISSING: tainted
149+
request.url.port, # $ tainted
150+
request.url.explicit_port, # $ tainted
150151

151-
request.url.authority, # $ MISSING: tainted
152-
request.url.raw_authority, # $ MISSING: tainted
152+
request.url.authority, # $ tainted
153+
request.url.raw_authority, # $ tainted
153154

154-
request.url.path, # $ MISSING: tainted
155-
request.url.raw_path, # $ MISSING: tainted
155+
request.url.path, # $ tainted
156+
request.url.raw_path, # $ tainted
156157

157-
request.url.path_qs, # $ MISSING: tainted
158-
request.url.raw_path_qs, # $ MISSING: tainted
158+
request.url.path_qs, # $ tainted
159+
request.url.raw_path_qs, # $ tainted
159160

160-
request.url.query_string, # $ MISSING: tainted
161-
request.url.raw_query_string, # $ MISSING: tainted
161+
request.url.query_string, # $ tainted
162+
request.url.raw_query_string, # $ tainted
162163

163-
request.url.fragment, # $ MISSING: tainted
164-
request.url.raw_fragment, # $ MISSING: tainted
164+
request.url.fragment, # $ tainted
165+
request.url.raw_fragment, # $ tainted
165166

166-
request.url.parts, # $ MISSING: tainted
167-
request.url.raw_parts, # $ MISSING: tainted
167+
request.url.parts, # $ tainted
168+
request.url.raw_parts, # $ tainted
168169

169-
request.url.name, # $ MISSING: tainted
170-
request.url.raw_name, # $ MISSING: tainted
170+
request.url.name, # $ tainted
171+
request.url.raw_name, # $ tainted
171172

172173
# multidict.MultiDictProxy[str]
173-
request.url.query, # $ MISSING: tainted
174-
request.url.query.getone("key"), # $ MISSING: tainted
175-
176-
request.url.with_scheme("foo"), # $ MISSING: tainted
177-
request.url.with_user("foo"), # $ MISSING: tainted
178-
request.url.with_password("foo"), # $ MISSING: tainted
179-
request.url.with_host("foo"), # $ MISSING: tainted
180-
request.url.with_port("foo"), # $ MISSING: tainted
181-
request.url.with_path("foo"), # $ MISSING: tainted
182-
request.url.with_query({"foo": 42}), # $ MISSING: tainted
183-
request.url.with_query(foo=42), # $ MISSING: tainted
184-
request.url.update_query({"foo": 42}), # $ MISSING: tainted
185-
request.url.update_query(foo=42), # $ MISSING: tainted
186-
request.url.with_fragment("foo"), # $ MISSING: tainted
187-
request.url.with_name("foo"), # $ MISSING: tainted
174+
request.url.query, # $ tainted
175+
request.url.query.getone("key"), # $ tainted
176+
177+
request.url.with_scheme("foo"), # $ tainted
178+
request.url.with_user("foo"), # $ tainted
179+
request.url.with_password("foo"), # $ tainted
180+
request.url.with_host("foo"), # $ tainted
181+
request.url.with_port("foo"), # $ tainted
182+
request.url.with_path("foo"), # $ tainted
183+
request.url.with_query({"foo": 42}), # $ tainted
184+
request.url.with_query(foo=42), # $ tainted
185+
request.url.update_query({"foo": 42}), # $ tainted
186+
request.url.update_query(foo=42), # $ tainted
187+
request.url.with_fragment("foo"), # $ tainted
188+
request.url.with_name("foo"), # $ tainted
188189

189190
request.url.join(yarl.URL("wat.html")), # $ tainted
190191

191-
request.url.human_repr(), # $ MISSING: tainted
192+
request.url.human_repr(), # $ tainted
192193
)
193194

194195

0 commit comments

Comments
 (0)