Skip to content

Commit 851c30e

Browse files
committed
Python: Add taint modeling of re.Match objects
1 parent ea4761d commit 851c30e

File tree

2 files changed

+145
-13
lines changed

2 files changed

+145
-13
lines changed

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3026,6 +3026,137 @@ private module StdlibPrivate {
30263026
override string getName() { result = "re." + method }
30273027
}
30283028

3029+
/**
3030+
* A flow summary for compiled regex objects
3031+
*
3032+
* See https://docs.python.org/3.11/library/re.html#re-objects
3033+
*/
3034+
class RePatternSummary extends SummarizedCallable {
3035+
RePatternSummary() { this = "re.Pattern" }
3036+
3037+
override DataFlow::CallCfgNode getACall() {
3038+
result = API::moduleImport("re").getMember("compile").getACall()
3039+
}
3040+
3041+
override DataFlow::ArgumentNode getACallback() {
3042+
result = API::moduleImport("re").getMember("compile").getAValueReachableFromSource()
3043+
}
3044+
3045+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3046+
input in ["Argument[0]", "Argument[pattern:]"] and
3047+
output = "ReturnValue.Attribute[pattern]" and
3048+
preservesValue = true
3049+
}
3050+
}
3051+
3052+
/**
3053+
* A flow summary for methods returning a `re.Match` object
3054+
*
3055+
* See https://docs.python.org/3/library/re.html#re.Match
3056+
*/
3057+
class ReMatchSummary extends SummarizedCallable {
3058+
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }
3059+
3060+
override DataFlow::CallCfgNode getACall() {
3061+
this = "re.Match" and
3062+
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
3063+
or
3064+
this = "compiled re.Match" and
3065+
result =
3066+
any(RePatternSummary c)
3067+
.getACall()
3068+
.(API::CallNode)
3069+
.getReturn()
3070+
.getMember(["match", "search", "fullmatch"])
3071+
.getACall()
3072+
}
3073+
3074+
override DataFlow::ArgumentNode getACallback() { none() }
3075+
3076+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3077+
exists(string arg |
3078+
this = "re.Match" and arg = "Argument[1]"
3079+
or
3080+
this = "compiled re.Match" and arg = "Argument[0]"
3081+
|
3082+
input in [arg, "Argument[string:]"] and
3083+
(
3084+
output = "ReturnValue.Attribute[string]" and
3085+
preservesValue = true
3086+
or
3087+
// indexing such as `match[g]` is the same as `match.group(g)`
3088+
// since you can index with both integers and strings, we model it as
3089+
// both list element and dictionary... a bit of a hack, but no way to model
3090+
// subscript operators directly with flow-summaries :|
3091+
output in ["ReturnValue.ListElement", "ReturnValue.DictionaryElementAny"] and
3092+
preservesValue = false
3093+
)
3094+
)
3095+
or
3096+
// regex pattern
3097+
(
3098+
this = "re.Match" and input in ["Argument[0]", "Argument[pattern:]"]
3099+
or
3100+
// for compiled regexes, this it is already stored in the `pattern` attribute
3101+
this = "compiled re.Match" and input = "Argument[self].Attribute[pattern]"
3102+
) and
3103+
output = "ReturnValue.Attribute[re].Attribute[pattern]" and
3104+
preservesValue = true
3105+
}
3106+
}
3107+
3108+
/**
3109+
* A flow summary for methods on a `re.Match` object
3110+
*
3111+
* See https://docs.python.org/3/library/re.html#re.Match
3112+
*/
3113+
class ReMatchMethodsSummary extends SummarizedCallable {
3114+
string methodName;
3115+
3116+
ReMatchMethodsSummary() {
3117+
this = "re.Match." + methodName and
3118+
methodName in ["expand", "group", "groups", "groupdict"]
3119+
}
3120+
3121+
override DataFlow::CallCfgNode getACall() {
3122+
result =
3123+
any(ReMatchSummary c)
3124+
.getACall()
3125+
.(API::CallNode)
3126+
.getReturn()
3127+
.getMember(methodName)
3128+
.getACall()
3129+
}
3130+
3131+
override DataFlow::ArgumentNode getACallback() { none() }
3132+
3133+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3134+
methodName = "expand" and
3135+
preservesValue = false and
3136+
(
3137+
input = "Argument[0]" and output = "ReturnValue"
3138+
or
3139+
input = "Argument[self].Attribute[string]" and
3140+
output = "ReturnValue"
3141+
)
3142+
or
3143+
methodName = "group" and
3144+
input = "Argument[self].Attribute[string]" and
3145+
output in ["ReturnValue", "ReturnValue.ListElement"] and
3146+
preservesValue = false
3147+
or
3148+
methodName = "groups" and
3149+
input = "Argument[self].Attribute[string]" and
3150+
output = "ReturnValue.ListElement" and
3151+
preservesValue = false
3152+
or
3153+
methodName = "groupdict" and
3154+
input = "Argument[self].Attribute[string]" and
3155+
output = "ReturnValue.DictionaryElementAny" and
3156+
preservesValue = false
3157+
}
3158+
}
3159+
30293160
/**
30303161
* A call to 're.escape'.
30313162
* See https://docs.python.org/3/library/re.html#re.escape

python/ql/test/library-tests/frameworks/stdlib/test_re.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141

4242
# user-controlled compiled pattern
4343
re.compile(ts), # $ tainted
44-
re.compile(ts).pattern, # $ MISSING: tainted
44+
re.compile(ts).pattern, # $ tainted
4545
)
4646

4747
ensure_not_tainted(
@@ -52,25 +52,26 @@
5252
tainted_match = re.match(pat, ts)
5353
safe_match = re.match(pat, "safe")
5454
ensure_tainted(
55-
tainted_match.expand("Hello \1"), # $ MISSING: tainted
56-
safe_match.expand(ts), # $ MISSING: tainted
57-
tainted_match.group(), # $ MISSING: tainted
58-
tainted_match.group(1, 2), # $ MISSING: tainted
59-
tainted_match.group(1, 2)[0], # $ MISSING: tainted
60-
tainted_match[0], # $ MISSING: tainted
55+
tainted_match.expand("Hello \1"), # $ tainted
56+
safe_match.expand(ts), # $ tainted
57+
tainted_match.group(), # $ tainted
58+
tainted_match.group(1, 2), # $ tainted
59+
tainted_match.group(1, 2)[0], # $ tainted
60+
tainted_match[0], # $ tainted
61+
tainted_match["key"], # $ tainted
6162

6263
tainted_match.groups(), # $ MISSING: tainted
63-
tainted_match.groups()[0], # $ MISSING: tainted
64+
tainted_match.groups()[0], # $ tainted
6465
tainted_match.groupdict(), # $ MISSING: tainted
65-
tainted_match.groupdict()["key"], # $ MISSING: tainted
66+
tainted_match.groupdict()["key"], # $ tainted
6667

67-
re.match(pat, ts).string, # $ MISSING: tainted
68+
re.match(pat, ts).string, # $ tainted
6869
re.match(ts, "safe").re, # $ MISSING: tainted
69-
re.match(ts, "safe").re.pattern, # $ MISSING: tainted
70+
re.match(ts, "safe").re.pattern, # $ tainted
7071

71-
compiled_pat.match(ts).string, # $ MISSING: tainted
72+
compiled_pat.match(ts).string, # $ tainted
7273
re.compile(ts).match("safe").re, # $ MISSING: tainted
73-
re.compile(ts).match("safe").re.pattern, # $ MISSING: tainted
74+
re.compile(ts).match("safe").re.pattern, # $ tainted
7475
)
7576
ensure_not_tainted(
7677
safe_match.expand("Hello \1"),

0 commit comments

Comments
 (0)