Skip to content

Commit e1c47f5

Browse files
committed
Python: Reorganize taint tests of re
Mostly to highlight that with flow-summary modeling, we don't expect taint for a lot of these. I aslo opted to make `finditer()` tainted for consistency.
1 parent ffc27b5 commit e1c47f5

File tree

2 files changed

+46
-42
lines changed

2 files changed

+46
-42
lines changed

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3225,8 +3225,9 @@ private module StdlibPrivate {
32253225
methodName in ["split", "findall", "finditer"] and
32263226
output = "ReturnValue.ListElement"
32273227
or
3228-
// TODO: Since we currently model lists as tainted, the result of findall and split needs to be tainted
3229-
methodName in ["split", "findall"] and
3228+
// TODO: Since we currently model iterables as tainted when their elements
3229+
// are, the result of findall, finditer, split needs to be tainted
3230+
methodName in ["split", "findall", "finditer"] and
32303231
output = "ReturnValue"
32313232
or
32323233
methodName = "sub" and

python/ql/test/library-tests/frameworks/stdlib/test_re.py

Lines changed: 43 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -6,47 +6,16 @@
66
compiled_pat = re.compile(pat)
77

88
# see https://docs.python.org/3/library/re.html#functions
9-
ensure_tainted(
10-
# returns Match object, see below
11-
re.search(pat, ts), # $ MISSING: tainted
12-
re.match(pat, ts), # $ MISSING: tainted
13-
re.fullmatch(pat, ts), # $ MISSING: tainted
14-
15-
# other functions not returning Match objects
16-
re.split(pat, ts), # $ tainted
17-
re.split(pat, ts)[0], # $ tainted
18-
19-
re.findall(pat, ts), # $ tainted
20-
re.findall(pat, ts)[0], # $ tainted
21-
22-
re.finditer(pat, ts), # $ MISSING: tainted
23-
[x for x in re.finditer(pat, ts)], # $ tainted
24-
25-
re.sub(pat, repl="safe", string=ts), # $ tainted
26-
re.sub(pat, repl=lambda m: ..., string=ts), # $ tainted
27-
re.sub(pat, repl=ts, string="safe"), # $ tainted
28-
re.sub(pat, repl=lambda m: ts, string="safe"), # $ tainted
29-
30-
re.subn(pat, repl="safe", string=ts), # $ MISSING: tainted
31-
re.subn(pat, repl="safe", string=ts)[0], # $ tainted // the string
32-
33-
# same for compiled patterns
34-
compiled_pat.search(ts), # $ MISSING: tainted
35-
compiled_pat.match(ts), # $ MISSING: tainted
36-
compiled_pat.fullmatch(ts), # $ MISSING: tainted
37-
38-
compiled_pat.split(ts), # $ tainted
39-
compiled_pat.split(ts)[0], # $ tainted
40-
41-
# ...
42-
43-
# user-controlled compiled pattern
44-
re.compile(ts), # $ tainted
45-
re.compile(ts).pattern, # $ tainted
46-
)
47-
489
ensure_not_tainted(
49-
re.subn(pat, repl="safe", string=ts)[1], # // the number of substitutions made
10+
# returns Match object, which is tested properly below. (note: with the flow summary
11+
# modeling, objects containing tainted values are not itself tainted).
12+
re.search(pat, ts),
13+
re.match(pat, ts),
14+
re.fullmatch(pat, ts),
15+
16+
compiled_pat.search(ts),
17+
compiled_pat.match(ts),
18+
compiled_pat.fullmatch(ts),
5019
)
5120

5221
# Match object
@@ -81,3 +50,37 @@
8150
re.match(pat, "safe").re,
8251
re.match(pat, "safe").string,
8352
)
53+
54+
ensure_tainted(
55+
# other functions not returning Match objects
56+
re.split(pat, ts), # $ tainted
57+
re.split(pat, ts)[0], # $ tainted
58+
59+
re.findall(pat, ts), # $ tainted
60+
re.findall(pat, ts)[0], # $ tainted
61+
62+
re.finditer(pat, ts), # $ tainted
63+
[x for x in re.finditer(pat, ts)], # $ tainted
64+
65+
re.sub(pat, repl="safe", string=ts), # $ tainted
66+
re.sub(pat, repl=lambda m: ..., string=ts), # $ tainted
67+
re.sub(pat, repl=ts, string="safe"), # $ tainted
68+
re.sub(pat, repl=lambda m: ts, string="safe"), # $ tainted
69+
70+
# same for compiled patterns
71+
compiled_pat.split(ts), # $ tainted
72+
compiled_pat.split(ts)[0], # $ tainted
73+
# ...
74+
75+
# user-controlled compiled pattern
76+
re.compile(ts), # $ tainted
77+
re.compile(ts).pattern, # $ tainted
78+
)
79+
80+
ensure_not_tainted(
81+
re.subn(pat, repl="safe", string=ts),
82+
re.subn(pat, repl="safe", string=ts)[1], # // the number of substitutions made
83+
)
84+
ensure_tainted(
85+
re.subn(pat, repl="safe", string=ts)[0], # $ tainted // the string
86+
)

0 commit comments

Comments
 (0)