Skip to content

Commit 3a285f5

Browse files
committed
Convert regex-use sinks to use MaD
1 parent a5a999f commit 3a285f5

File tree

5 files changed

+87
-37
lines changed

5 files changed

+87
-37
lines changed

go/ql/lib/ext/regexp.model.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,18 @@
11
extensions:
2+
- addsTo:
3+
pack: codeql/go-all
4+
extensible: sinkModel
5+
data:
6+
- ["regexp", "", True, "Compile", "", "", "Argument[0]", "regex-use[c]", "manual"]
7+
- ["regexp", "", True, "CompilePOSIX", "", "", "Argument[0]", "regex-use[c]", "manual"]
8+
- ["regexp", "", True, "MustCompile", "", "", "Argument[0]", "regex-use[c]", "manual"]
9+
- ["regexp", "", True, "MustCompilePOSIX", "", "", "Argument[0]", "regex-use[c]", "manual"]
10+
- ["regexp", "", True, "Match", "", "", "Argument[0]", "regex-use[1]", "manual"]
11+
- ["regexp", "", True, "MatchReader", "", "", "Argument[0]", "regex-use[1]", "manual"]
12+
- ["regexp", "", True, "MatchString", "", "", "Argument[0]", "regex-use[1]", "manual"]
13+
- ["regexp", "Regexp", True, "Match", "", "", "Argument[receiver]", "regex-use[0]", "manual"]
14+
- ["regexp", "Regexp", True, "MatchReader", "", "", "Argument[receiver]", "regex-use[0]", "manual"]
15+
- ["regexp", "Regexp", True, "MatchString", "", "", "Argument[receiver]", "regex-use[0]", "manual"]
216
- addsTo:
317
pack: codeql/go-all
418
extensible: summaryModel

go/ql/lib/semmle/go/frameworks/stdlib/Regexp.qll

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,43 @@ import go
66

77
/** Provides models of commonly used functions in the `regexp` package. */
88
module Regexp {
9-
private class Pattern extends RegexpPattern::Range, DataFlow::ArgumentNode {
10-
string fnName;
9+
/**
10+
* Holds if `kind` is an external sink kind that is relevant for regex flow.
11+
* `strArg` is the index of the argument to methods with this sink kind that
12+
* contain the string to be matched against, where -1 is the qualifier; or -2
13+
* if no such argument exists and the function compiles the regex; or -3 if
14+
* no such argument exists and the function does not compile the regex.
15+
*
16+
* So `regex-use[0]` indicates that argument 0 contains the string to matched
17+
* against, `regex-use[c]` indicates that there is no string to be matched
18+
* against and the return value of the function is a compiled regex, and
19+
* `regex-use` means that there is no string to be matched against and the
20+
* function does not compile the regex.
21+
*/
22+
private predicate regexSinkKindInfo(string kind, int strArg) {
23+
sinkModel(_, _, _, _, _, _, _, kind, _, _) and
24+
exists(string strArgStr |
25+
(
26+
strArgStr.toInt() = strArg
27+
or
28+
strArg = -2 and
29+
strArgStr = "c"
30+
)
31+
|
32+
kind = "regex-use[" + strArgStr + "]"
33+
)
34+
or
35+
strArg = -3 and
36+
kind = "regex-use"
37+
}
1138

12-
Pattern() {
13-
exists(Function fn | fnName.matches("Match%") or fnName.matches("%Compile%") |
14-
fn.hasQualifiedName("regexp", fnName) and
15-
this = fn.getACall().getArgument(0)
39+
private class DefaultRegexpPattern extends RegexpPattern::Range, DataFlow::ArgumentNode {
40+
int strArg;
41+
42+
DefaultRegexpPattern() {
43+
exists(string kind |
44+
regexSinkKindInfo(kind, strArg) and
45+
sinkNode(this, kind)
1646
)
1747
}
1848

@@ -21,38 +51,37 @@ module Regexp {
2151
override string getPattern() { result = this.asExpr().getStringValue() }
2252

2353
override DataFlow::Node getAUse() {
24-
fnName.matches("MustCompile%") and
25-
result = this.getCall().getASuccessor*()
26-
or
27-
fnName.matches("Compile%") and
54+
strArg = -2 and
2855
result = this.getCall().getResult(0).getASuccessor*()
2956
or
3057
result = this
3158
}
3259
}
3360

34-
private class MatchFunction extends RegexpMatchFunction::Range, Function {
35-
MatchFunction() {
36-
exists(string fn | fn.matches("Match%") | this.hasQualifiedName("regexp", fn))
61+
private class DefaultRegexpMatchFunction extends RegexpMatchFunction::Range, Function {
62+
int patArg;
63+
int strArg;
64+
65+
DefaultRegexpMatchFunction() {
66+
exists(DefaultRegexpPattern drp, string kind |
67+
drp.getCall() = this.getACall() and
68+
sinkNode(drp, kind)
69+
|
70+
patArg = drp.getPosition() and
71+
regexSinkKindInfo(kind, strArg) and
72+
strArg >= -1
73+
)
3774
}
3875

39-
override FunctionInput getRegexpArg() { result.isParameter(0) }
40-
41-
override FunctionInput getValue() { result.isParameter(1) }
42-
43-
override FunctionOutput getResult() { result.isResult(0) }
44-
}
45-
46-
private class MatchMethod extends RegexpMatchFunction::Range, Method {
47-
MatchMethod() {
48-
exists(string fn | fn.matches("Match%") | this.hasQualifiedName("regexp", "Regexp", fn))
76+
override FunctionInput getRegexpArg() {
77+
patArg = -1 and result.isReceiver()
78+
or
79+
patArg >= 0 and result.isParameter(patArg)
4980
}
5081

51-
override FunctionInput getRegexpArg() { result.isReceiver() }
82+
override FunctionInput getValue() { result.isParameter(strArg) }
5283

53-
override FunctionInput getValue() { result.isParameter(0) }
54-
55-
override FunctionOutput getResult() { result.isResult() }
84+
override FunctionOutput getResult() { result.isResult(0) }
5685
}
5786

5887
private class ReplaceFunction extends RegexpReplaceFunction::Range, Method {
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
| file://:0:0:0:0 | Match | stdlib.go:16:30:16:37 | "posix?" | stdlib.go:28:11:28:24 | type conversion | stdlib.go:28:2:28:25 | call to Match |
2+
| file://:0:0:0:0 | Match | stdlib.go:28:2:28:3 | re | stdlib.go:28:11:28:24 | type conversion | stdlib.go:28:2:28:25 | call to Match |
23
| file://:0:0:0:0 | MatchReader | stdlib.go:16:30:16:37 | "posix?" | stdlib.go:29:17:29:37 | call to NewReader | stdlib.go:29:2:29:38 | call to MatchReader |
4+
| file://:0:0:0:0 | MatchReader | stdlib.go:29:2:29:3 | re | stdlib.go:29:17:29:37 | call to NewReader | stdlib.go:29:2:29:38 | call to MatchReader |
35
| file://:0:0:0:0 | MatchString | stdlib.go:16:30:16:37 | "posix?" | stdlib.go:17:17:17:22 | "posi" | stdlib.go:17:2:17:23 | call to MatchString |
6+
| file://:0:0:0:0 | MatchString | stdlib.go:17:2:17:3 | re | stdlib.go:17:17:17:22 | "posi" | stdlib.go:17:2:17:23 | call to MatchString |

go/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegexp/IncompleteHostnameRegexp.expected

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
1+
#select
2+
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:12:38:12:39 | re | This regular expression has an unescaped dot before ')?example.com', so it might match more hosts than expected when $@. | IncompleteHostnameRegexp.go:12:38:12:39 | re | the regular expression is used |
3+
| main.go:40:60:40:79 | "^test2.github.com$" | main.go:40:60:40:79 | "^test2.github.com$" | main.go:40:60:40:79 | "^test2.github.com$" | This regular expression has an unescaped dot before 'github.com', so it might match more hosts than expected when $@. | main.go:40:60:40:79 | "^test2.github.com$" | the regular expression is used |
4+
| main.go:45:15:45:39 | `https://www.example.com` | main.go:45:15:45:39 | `https://www.example.com` | main.go:45:15:45:39 | `https://www.example.com` | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:45:15:45:39 | `https://www.example.com` | the regular expression is used |
5+
| main.go:49:21:49:45 | `https://www.example.com` | main.go:49:21:49:45 | `https://www.example.com` | main.go:65:15:65:23 | localVar3 | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:65:15:65:23 | localVar3 | the regular expression is used |
6+
| main.go:56:15:56:34 | ...+... | main.go:56:15:56:34 | ...+... | main.go:56:15:56:34 | ...+... | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:56:15:56:34 | ...+... | the regular expression is used |
7+
| main.go:58:15:58:42 | ...+... | main.go:58:15:58:42 | ...+... | main.go:58:15:58:42 | ...+... | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:58:15:58:42 | ...+... | the regular expression is used |
18
edges
2-
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:12:38:12:39 | re | provenance | |
9+
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:12:38:12:39 | re | provenance | Sink:MaD:2 |
310
| main.go:49:21:49:45 | `https://www.example.com` | main.go:62:15:62:25 | sourceConst | provenance | |
4-
| main.go:62:15:62:25 | sourceConst | main.go:65:15:65:23 | localVar3 | provenance | |
11+
| main.go:62:15:62:25 | sourceConst | main.go:65:15:65:23 | localVar3 | provenance | Sink:MaD:1 |
12+
models
13+
| 1 | Sink: regexp; ; true; Match; ; ; Argument[0]; regex-use[1]; manual |
14+
| 2 | Sink: regexp; ; true; MatchString; ; ; Argument[0]; regex-use[1]; manual |
515
nodes
616
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | semmle.label | "^((www\|beta).)?example.com/" |
717
| IncompleteHostnameRegexp.go:12:38:12:39 | re | semmle.label | re |
@@ -13,10 +23,3 @@ nodes
1323
| main.go:62:15:62:25 | sourceConst | semmle.label | sourceConst |
1424
| main.go:65:15:65:23 | localVar3 | semmle.label | localVar3 |
1525
subpaths
16-
#select
17-
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:12:38:12:39 | re | This regular expression has an unescaped dot before ')?example.com', so it might match more hosts than expected when $@. | IncompleteHostnameRegexp.go:12:38:12:39 | re | the regular expression is used |
18-
| main.go:40:60:40:79 | "^test2.github.com$" | main.go:40:60:40:79 | "^test2.github.com$" | main.go:40:60:40:79 | "^test2.github.com$" | This regular expression has an unescaped dot before 'github.com', so it might match more hosts than expected when $@. | main.go:40:60:40:79 | "^test2.github.com$" | the regular expression is used |
19-
| main.go:45:15:45:39 | `https://www.example.com` | main.go:45:15:45:39 | `https://www.example.com` | main.go:45:15:45:39 | `https://www.example.com` | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:45:15:45:39 | `https://www.example.com` | the regular expression is used |
20-
| main.go:49:21:49:45 | `https://www.example.com` | main.go:49:21:49:45 | `https://www.example.com` | main.go:65:15:65:23 | localVar3 | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:65:15:65:23 | localVar3 | the regular expression is used |
21-
| main.go:56:15:56:34 | ...+... | main.go:56:15:56:34 | ...+... | main.go:56:15:56:34 | ...+... | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:56:15:56:34 | ...+... | the regular expression is used |
22-
| main.go:58:15:58:42 | ...+... | main.go:58:15:58:42 | ...+... | main.go:58:15:58:42 | ...+... | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:58:15:58:42 | ...+... | the regular expression is used |
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
Security/CWE-020/IncompleteHostnameRegexp.ql
1+
query: Security/CWE-020/IncompleteHostnameRegexp.ql
2+
postprocess: TestUtilities/PrettyPrintModels.ql

0 commit comments

Comments
 (0)