Skip to content

Commit 2e851cd

Browse files
committed
Python: Improve yarl.URL modeling
1 parent 9372e3b commit 2e851cd

File tree

8 files changed

+85
-64
lines changed

8 files changed

+85
-64
lines changed

python/ql/src/semmle/python/frameworks/Yarl.qll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ module Yarl {
3434
*/
3535
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
3636

37+
/** A direct instantiation of `yarl.URL`. */
38+
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
39+
ClassInstantiation() { this = API::moduleImport("yarl").getMember("URL").getACall() }
40+
}
41+
3742
/** Gets a reference to an instance of `yarl.URL`. */
3843
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
3944
t.start() and
@@ -52,6 +57,12 @@ module Yarl {
5257
*/
5358
class YarlUrlAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
5459
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
60+
// class instantiation
61+
exists(ClassInstantiation call |
62+
nodeFrom in [call.getArg(0), call.getArgByName("val")] and
63+
nodeTo = call
64+
)
65+
or
5566
// Methods
5667
//
5768
// TODO: When we have tools that make it easy, model these properly to handle

python/ql/test/library-tests/frameworks/aiohttp/taint_test.py

Lines changed: 3 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@ async def test_taint(request: web.Request): # $ requestHandler
55
ensure_tainted(
66
request, # $ tainted
77

8-
# yarl.URL instances
8+
# yarl.URL instances, see tests under `yarl` framework tests
99
# https://yarl.readthedocs.io/en/stable/api.html#yarl.URL
10-
# see below
1110
request.url, # $ tainted
11+
request.url.human_repr(), # $ tainted
1212
request.rel_url, # $ tainted
13+
request.rel_url.human_repr(), # $ tainted
1314

1415
request.forwarded, # $ tainted
1516

@@ -130,68 +131,6 @@ async def test_taint(request: web.Request): # $ requestHandler
130131
request.config_dict,
131132
)
132133

133-
# TODO: Should have a better way to capture that we in fact _do_ model this as a
134-
# an instance of the right class, and have the actual taint_test for that in a
135-
# different file!
136-
import yarl
137-
138-
ensure_tainted(
139-
# see https://yarl.readthedocs.io/en/stable/api.html#yarl.URL
140-
request.url.user, # $ tainted
141-
request.url.raw_user, # $ tainted
142-
143-
request.url.password, # $ tainted
144-
request.url.raw_password, # $ tainted
145-
146-
request.url.host, # $ tainted
147-
request.url.raw_host, # $ tainted
148-
149-
request.url.port, # $ tainted
150-
request.url.explicit_port, # $ tainted
151-
152-
request.url.authority, # $ tainted
153-
request.url.raw_authority, # $ tainted
154-
155-
request.url.path, # $ tainted
156-
request.url.raw_path, # $ tainted
157-
158-
request.url.path_qs, # $ tainted
159-
request.url.raw_path_qs, # $ tainted
160-
161-
request.url.query_string, # $ tainted
162-
request.url.raw_query_string, # $ tainted
163-
164-
request.url.fragment, # $ tainted
165-
request.url.raw_fragment, # $ tainted
166-
167-
request.url.parts, # $ tainted
168-
request.url.raw_parts, # $ tainted
169-
170-
request.url.name, # $ tainted
171-
request.url.raw_name, # $ tainted
172-
173-
# multidict.MultiDictProxy[str]
174-
request.url.query, # $ tainted
175-
request.url.query.getone("key"), # $ tainted
176-
177-
request.url.with_scheme("foo"), # $ tainted
178-
request.url.with_user("foo"), # $ tainted
179-
request.url.with_password("foo"), # $ tainted
180-
request.url.with_host("foo"), # $ tainted
181-
request.url.with_port("foo"), # $ tainted
182-
request.url.with_path("foo"), # $ tainted
183-
request.url.with_query({"foo": 42}), # $ tainted
184-
request.url.with_query(foo=42), # $ tainted
185-
request.url.update_query({"foo": 42}), # $ tainted
186-
request.url.update_query(foo=42), # $ tainted
187-
request.url.with_fragment("foo"), # $ tainted
188-
request.url.with_name("foo"), # $ tainted
189-
190-
request.url.join(yarl.URL("wat.html")), # $ tainted
191-
192-
request.url.human_repr(), # $ tainted
193-
)
194-
195134

196135
class TaintTestClass(web.View):
197136
def get(self): # $ requestHandler

python/ql/test/library-tests/frameworks/yarl/ConceptsTest.expected

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import python
2+
import experimental.meta.ConceptsTest
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
argumentToEnsureNotTaintedNotMarkedAsSpurious
2+
untaintedArgumentToEnsureTaintedNotMarkedAsMissing
3+
failures
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
import experimental.meta.InlineTaintTest
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: --max-import-depth=1 --lang=3
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import yarl
2+
3+
4+
url = yarl.URL(TAINTED_STRING)
5+
6+
7+
ensure_tainted(
8+
url, # $ tainted
9+
10+
# see https://yarl.readthedocs.io/en/stable/api.html#yarl.URL
11+
url.user, # $ tainted
12+
url.raw_user, # $ tainted
13+
14+
url.password, # $ tainted
15+
url.raw_password, # $ tainted
16+
17+
url.host, # $ tainted
18+
url.raw_host, # $ tainted
19+
20+
url.port, # $ tainted
21+
url.explicit_port, # $ tainted
22+
23+
url.authority, # $ tainted
24+
url.raw_authority, # $ tainted
25+
26+
url.path, # $ tainted
27+
url.raw_path, # $ tainted
28+
29+
url.path_qs, # $ tainted
30+
url.raw_path_qs, # $ tainted
31+
32+
url.query_string, # $ tainted
33+
url.raw_query_string, # $ tainted
34+
35+
url.fragment, # $ tainted
36+
url.raw_fragment, # $ tainted
37+
38+
url.parts, # $ tainted
39+
url.raw_parts, # $ tainted
40+
41+
url.name, # $ tainted
42+
url.raw_name, # $ tainted
43+
44+
# multidict.MultiDictProxy[str]
45+
url.query, # $ tainted
46+
url.query.getone("key"), # $ tainted
47+
48+
url.with_scheme("foo"), # $ tainted
49+
url.with_user("foo"), # $ tainted
50+
url.with_password("foo"), # $ tainted
51+
url.with_host("foo"), # $ tainted
52+
url.with_port("foo"), # $ tainted
53+
url.with_path("foo"), # $ tainted
54+
url.with_query({"foo": 42}), # $ tainted
55+
url.with_query(foo=42), # $ tainted
56+
url.update_query({"foo": 42}), # $ tainted
57+
url.update_query(foo=42), # $ tainted
58+
url.with_fragment("foo"), # $ tainted
59+
url.with_name("foo"), # $ tainted
60+
61+
url.join(yarl.URL("wat.html")), # $ tainted
62+
63+
url.human_repr(), # $ tainted
64+
)

0 commit comments

Comments
 (0)