Skip to content

Commit 0ac4a10

Browse files
committed
Python: model that finditer returns iterable of re.Match objects
1 parent 494b8bd commit 0ac4a10

File tree

2 files changed

+34
-22
lines changed

2 files changed

+34
-22
lines changed

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3284,6 +3284,18 @@ module StdlibPrivate {
32843284
}
32853285
}
32863286

3287+
/**
3288+
* A base API node for regular expression functions.
3289+
* Either the `re` module or a compiled regular expression.
3290+
*/
3291+
private API::Node re(boolean compiled) {
3292+
result = API::moduleImport("re") and
3293+
compiled = false
3294+
or
3295+
result = any(RePatternSummary c).getACall().(API::CallNode).getReturn() and
3296+
compiled = true
3297+
}
3298+
32873299
/**
32883300
* A flow summary for methods returning a `re.Match` object
32893301
*
@@ -3293,17 +3305,18 @@ module StdlibPrivate {
32933305
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
32943306

32953307
override DataFlow::CallCfgNode getACall() {
3296-
this = "re.Match" and
3297-
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
3298-
or
3299-
this = "compiled re.Match" and
3300-
result =
3301-
any(RePatternSummary c)
3302-
.getACall()
3303-
.(API::CallNode)
3304-
.getReturn()
3305-
.getMember(["match", "search", "fullmatch"])
3306-
.getACall()
3308+
exists(API::Node re, boolean compiled |
3309+
re = re(compiled) and
3310+
(
3311+
compiled = false and
3312+
this = "re.Match"
3313+
or
3314+
compiled = true and
3315+
this = "compiled re.Match"
3316+
)
3317+
|
3318+
result = re.getMember(["match", "search", "fullmatch"]).getACall()
3319+
)
33073320
}
33083321

33093322
override DataFlow::ArgumentNode getACallback() { none() }
@@ -3340,6 +3353,13 @@ module StdlibPrivate {
33403353
}
33413354
}
33423355

3356+
/** An API node for a `re.Match` object */
3357+
private API::Node match() {
3358+
result = any(ReMatchSummary c).getACall().(API::CallNode).getReturn()
3359+
or
3360+
result = re(_).getMember("finditer").getReturn().getASubscript()
3361+
}
3362+
33433363
/**
33443364
* A flow summary for methods on a `re.Match` object
33453365
*
@@ -3353,15 +3373,7 @@ module StdlibPrivate {
33533373
methodName in ["expand", "group", "groups", "groupdict"]
33543374
}
33553375

3356-
override DataFlow::CallCfgNode getACall() {
3357-
result =
3358-
any(ReMatchSummary c)
3359-
.getACall()
3360-
.(API::CallNode)
3361-
.getReturn()
3362-
.getMember(methodName)
3363-
.getACall()
3364-
}
3376+
override DataFlow::CallCfgNode getACall() { result = match().getMember(methodName).getACall() }
33653377

33663378
override DataFlow::ArgumentNode getACallback() { none() }
33673379

python/ql/test/library-tests/frameworks/stdlib/test_re.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@
4242
list(re.finditer(pat, ts))[0].string, # $ tainted
4343
[m.string for m in re.finditer(pat, ts)], # $ tainted
4444

45-
list(re.finditer(pat, ts))[0].groups()[0], # $ MISSING: tainted
46-
[m.groups()[0] for m in re.finditer(pat, ts)], # $ MISSING: tainted
45+
list(re.finditer(pat, ts))[0].groups()[0], # $ MISSING: tainted // this requires list content in type tracking
46+
[m.groups()[0] for m in re.finditer(pat, ts)], # $ tainted
4747
)
4848
ensure_not_tainted(
4949
safe_match.expand("Hello \1"),

0 commit comments

Comments
 (0)