Skip to content

Commit 2af60f1

Browse files
authored
Merge pull request github#17712 from yoff/python/re-finditer-match
Python: model that `re.finditer` returns an iterable of `re.Match` objects
2 parents ac8b973 + 6bd4614 commit 2af60f1

File tree

3 files changed

+50
-20
lines changed

3 files changed

+50
-20
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Modelled that `re.finditer` returns an iterable of `re.Match` objects. This is now understood by the API graph in many cases.

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3284,6 +3284,18 @@ module StdlibPrivate {
32843284
}
32853285
}
32863286

3287+
/**
3288+
* A base API node for regular expression functions.
3289+
* Either the `re` module or a compiled regular expression.
3290+
*/
3291+
private API::Node re(boolean compiled) {
3292+
result = API::moduleImport("re") and
3293+
compiled = false
3294+
or
3295+
result = any(RePatternSummary c).getACall().(API::CallNode).getReturn() and
3296+
compiled = true
3297+
}
3298+
32873299
/**
32883300
* A flow summary for methods returning a `re.Match` object
32893301
*
@@ -3293,17 +3305,18 @@ module StdlibPrivate {
32933305
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
32943306

32953307
override DataFlow::CallCfgNode getACall() {
3296-
this = "re.Match" and
3297-
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
3298-
or
3299-
this = "compiled re.Match" and
3300-
result =
3301-
any(RePatternSummary c)
3302-
.getACall()
3303-
.(API::CallNode)
3304-
.getReturn()
3305-
.getMember(["match", "search", "fullmatch"])
3306-
.getACall()
3308+
exists(API::Node re, boolean compiled |
3309+
re = re(compiled) and
3310+
(
3311+
compiled = false and
3312+
this = "re.Match"
3313+
or
3314+
compiled = true and
3315+
this = "compiled re.Match"
3316+
)
3317+
|
3318+
result = re.getMember(["match", "search", "fullmatch"]).getACall()
3319+
)
33073320
}
33083321

33093322
override DataFlow::ArgumentNode getACallback() { none() }
@@ -3340,6 +3353,13 @@ module StdlibPrivate {
33403353
}
33413354
}
33423355

3356+
/** An API node for a `re.Match` object */
3357+
private API::Node match() {
3358+
result = any(ReMatchSummary c).getACall().(API::CallNode).getReturn()
3359+
or
3360+
result = re(_).getMember("finditer").getReturn().getASubscript()
3361+
}
3362+
33433363
/**
33443364
* A flow summary for methods on a `re.Match` object
33453365
*
@@ -3353,15 +3373,7 @@ module StdlibPrivate {
33533373
methodName in ["expand", "group", "groups", "groupdict"]
33543374
}
33553375

3356-
override DataFlow::CallCfgNode getACall() {
3357-
result =
3358-
any(ReMatchSummary c)
3359-
.getACall()
3360-
.(API::CallNode)
3361-
.getReturn()
3362-
.getMember(methodName)
3363-
.getACall()
3364-
}
3376+
override DataFlow::CallCfgNode getACall() { result = match().getMember(methodName).getACall() }
33653377

33663378
override DataFlow::ArgumentNode getACallback() { none() }
33673379

@@ -3463,6 +3475,14 @@ module StdlibPrivate {
34633475
) and
34643476
preservesValue = false
34653477
)
3478+
or
3479+
// flow from input string to attribute on match object
3480+
exists(int arg | arg = methodName.(RegexExecutionMethod).getStringArgIndex() - offset |
3481+
input in ["Argument[" + arg + "]", "Argument[string:]"] and
3482+
methodName = "finditer" and
3483+
output = "ReturnValue.ListElement.Attribute[string]" and
3484+
preservesValue = true
3485+
)
34663486
)
34673487
}
34683488
}

python/ql/test/library-tests/frameworks/stdlib/test_re.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@
3838

3939
compiled_pat.match(ts).string, # $ tainted
4040
re.compile(ts).match("safe").re.pattern, # $ tainted
41+
42+
list(re.finditer(pat, ts))[0].string, # $ tainted
43+
[m.string for m in re.finditer(pat, ts)], # $ tainted
44+
45+
list(re.finditer(pat, ts))[0].groups()[0], # $ MISSING: tainted // this requires list content in type tracking
46+
[m.groups()[0] for m in re.finditer(pat, ts)], # $ tainted
4147
)
4248
ensure_not_tainted(
4349
safe_match.expand("Hello \1"),

0 commit comments

Comments
 (0)