Skip to content

Commit a58dbf2

Browse files
authored
Merge pull request #13759 from geoffw0/parsemode2
Swift: Refactor regex library
2 parents 9b0d7f3 + 5dea539 commit a58dbf2

File tree

4 files changed

+107
-34
lines changed

4 files changed

+107
-34
lines changed

swift/ql/lib/codeql/swift/regex/Regex.qll

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,51 +6,58 @@ import swift
66
import codeql.swift.regex.RegexTreeView
77
private import codeql.swift.dataflow.DataFlow
88
private import internal.ParseRegex
9+
private import internal.RegexTracking
910

1011
/**
11-
* A data flow configuration for tracking string literals that are used as
12-
* regular expressions.
12+
* A string literal that is used as a regular expression. For example
13+
* the string literal `"(a|b).*"` in:
14+
* ```
15+
* Regex("(a|b).*").firstMatch(in: myString)
16+
* ```
1317
*/
14-
private module RegexUseConfig implements DataFlow::ConfigSig {
15-
predicate isSource(DataFlow::Node node) { node.asExpr() instanceof StringLiteralExpr }
18+
private class ParsedStringRegex extends RegExp, StringLiteralExpr {
19+
DataFlow::Node use;
20+
21+
ParsedStringRegex() { StringLiteralUseFlow::flow(DataFlow::exprNode(this), use) }
22+
23+
/**
24+
* Gets a dataflow node where this string literal is used as a regular
25+
* expression.
26+
*/
27+
DataFlow::Node getUse() { result = use }
28+
}
1629

17-
predicate isSink(DataFlow::Node node) { node.asExpr() = any(RegexEval eval).getRegexInput() }
30+
/**
31+
* A data-flow node where a regular expression object is created.
32+
*/
33+
abstract class RegexCreation extends DataFlow::Node {
34+
/**
35+
* Gets a dataflow node for the string that the regular expression object is
36+
* created from.
37+
*/
38+
abstract DataFlow::Node getStringInput();
39+
}
40+
41+
/**
42+
* A data-flow node where a `Regex` or `NSRegularExpression` object is created.
43+
*/
44+
private class StandardRegexCreation extends RegexCreation {
45+
DataFlow::Node input;
1846

19-
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
20-
// flow through `Regex` initializer, i.e. from a string to a `Regex` object.
47+
StandardRegexCreation() {
2148
exists(CallExpr call |
2249
(
2350
call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) or
2451
call.getStaticTarget()
2552
.(Method)
2653
.hasQualifiedName("NSRegularExpression", "init(pattern:options:)")
2754
) and
28-
nodeFrom.asExpr() = call.getArgument(0).getExpr() and
29-
nodeTo.asExpr() = call
55+
input.asExpr() = call.getArgument(0).getExpr() and
56+
this.asExpr() = call
3057
)
3158
}
32-
}
33-
34-
private module RegexUseFlow = DataFlow::Global<RegexUseConfig>;
35-
36-
/**
37-
* A string literal that is used as a regular expression in a regular
38-
* expression evaluation. For example the string literal `"(a|b).*"` in:
39-
* ```
40-
* Regex("(a|b).*").firstMatch(in: myString)
41-
* ```
42-
*/
43-
private class ParsedStringRegex extends RegExp, StringLiteralExpr {
44-
RegexEval eval;
45-
46-
ParsedStringRegex() {
47-
RegexUseFlow::flow(DataFlow::exprNode(this), DataFlow::exprNode(eval.getRegexInput()))
48-
}
4959

50-
/**
51-
* Gets a call that evaluates this regular expression.
52-
*/
53-
RegexEval getEval() { result = eval }
60+
override DataFlow::Node getStringInput() { result = input }
5461
}
5562

5663
/**
@@ -61,7 +68,8 @@ private class ParsedStringRegex extends RegExp, StringLiteralExpr {
6168
*/
6269
abstract class RegexEval extends CallExpr {
6370
/**
64-
* Gets the input to this call that is the regular expression being evaluated.
71+
* Gets the input to this call that is the regular expression being evaluated. This may
72+
* be a regular expression object or a string literal.
6573
*/
6674
abstract Expr getRegexInput();
6775

@@ -73,7 +81,16 @@ abstract class RegexEval extends CallExpr {
7381
/**
7482
* Gets a regular expression value that is evaluated here (if any can be identified).
7583
*/
76-
RegExp getARegex() { result.(ParsedStringRegex).getEval() = this }
84+
RegExp getARegex() {
85+
// string literal used directly as a regex
86+
result.(ParsedStringRegex).getUse().asExpr() = this.getRegexInput()
87+
or
88+
// string literal -> regex object -> use
89+
exists(RegexCreation regexCreation |
90+
result.(ParsedStringRegex).getUse() = regexCreation.getStringInput() and
91+
RegexUseFlow::flow(regexCreation, DataFlow::exprNode(this.getRegexInput()))
92+
)
93+
}
7794
}
7895

7996
/**
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/**
2+
* Provides classes and predicates that track strings and regular expressions
3+
* to where they are used, along with properties of the regex such as parse
4+
* mode flags that have been set.
5+
*/
6+
7+
import swift
8+
import codeql.swift.regex.RegexTreeView
9+
private import codeql.swift.dataflow.DataFlow
10+
private import ParseRegex
11+
private import codeql.swift.regex.Regex
12+
13+
/**
14+
* A data flow configuration for tracking string literals that are used to
15+
* create regular expression objects, or are evaluated directly as regular
16+
* expressions.
17+
*/
18+
private module StringLiteralUseConfig implements DataFlow::ConfigSig {
19+
predicate isSource(DataFlow::Node node) { node.asExpr() instanceof StringLiteralExpr }
20+
21+
predicate isSink(DataFlow::Node node) {
22+
// evaluated directly as a regular expression
23+
node.asExpr() = any(RegexEval eval).getRegexInput()
24+
or
25+
// used to create a regular expression object
26+
node = any(RegexCreation regexCreation).getStringInput()
27+
}
28+
}
29+
30+
module StringLiteralUseFlow = DataFlow::Global<StringLiteralUseConfig>;
31+
32+
/**
33+
* A data flow configuration for tracking regular expression objects from
34+
* creation to the point of use.
35+
*/
36+
private module RegexUseConfig implements DataFlow::ConfigSig {
37+
predicate isSource(DataFlow::Node node) {
38+
// creation of the regex
39+
node instanceof RegexCreation
40+
// TODO: track parse mode flags.
41+
}
42+
43+
predicate isSink(DataFlow::Node node) {
44+
// evaluation of the regex
45+
node.asExpr() = any(RegexEval eval).getRegexInput()
46+
}
47+
48+
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
49+
// TODO: flow through regex methods that return a modified regex.
50+
none()
51+
}
52+
}
53+
54+
module RegexUseFlow = DataFlow::Global<RegexUseConfig>;
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
| ReDoS.swift:64:22:64:22 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
12
| ReDoS.swift:65:22:65:22 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
23
| ReDoS.swift:66:22:66:22 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
34
| ReDoS.swift:69:18:69:18 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
5+
| ReDoS.swift:75:46:75:46 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
46
| ReDoS.swift:77:57:77:57 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
57
| ReDoS.swift:80:57:80:57 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |

swift/ql/test/query-tests/Security/CWE-1333/ReDoS.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ func myRegexpTests(myUrl: URL) throws {
6161
// Regex
6262

6363
_ = "((a*)*b)" // GOOD (never used)
64-
_ = try Regex("((a*)*b)") // DUBIOUS (never used)
64+
_ = try Regex("((a*)*b)") // DUBIOUS (never used) [FLAGGED]
6565
_ = try Regex("((a*)*b)").firstMatch(in: untainted) // DUBIOUS (never used on tainted input) [FLAGGED]
6666
_ = try Regex("((a*)*b)").firstMatch(in: tainted) // BAD
6767
_ = try Regex(".*").firstMatch(in: tainted) // GOOD (safe regex)
@@ -72,7 +72,7 @@ func myRegexpTests(myUrl: URL) throws {
7272

7373
// NSRegularExpression
7474

75-
_ = try? NSRegularExpression(pattern: "((a*)*b)") // DUBIOUS (never used)
75+
_ = try? NSRegularExpression(pattern: "((a*)*b)") // DUBIOUS (never used) [FLAGGED]
7676

7777
let nsregex1 = try? NSRegularExpression(pattern: "((a*)*b)") // DUBIOUS (never used on tainted input) [FLAGGED]
7878
_ = nsregex1?.stringByReplacingMatches(in: untainted, range: NSRange(location: 0, length: untainted.utf16.count), withTemplate: "")

0 commit comments

Comments
 (0)