Skip to content

Commit 8273fa1

Browse files
committed
Swift: Track parse modes (prototype version).
1 parent 5dea539 commit 8273fa1

File tree

4 files changed

+105
-8
lines changed

4 files changed

+105
-8
lines changed

swift/ql/lib/codeql/swift/regex/Regex.qll

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,65 @@ private class StandardRegexCreation extends RegexCreation {
6060
override DataFlow::Node getStringInput() { result = input }
6161
}
6262

63+
newtype TRegexParseMode =
64+
MkIgnoreCase() or // case insensitive
65+
MkVerbose() or // ignores whitespace and `#` comments within patterns
66+
MkDotAll() or // dot matches all characters, including line terminators
67+
MkMultiLine() or // `^` and `$` also match beginning and end of lines
68+
MkUnicode() // Unicode UAX 29 word boundary mode
69+
70+
class RegexParseMode extends TRegexParseMode {
71+
string toString() {
72+
(this = MkIgnoreCase() and result = "IGNORECASE") or
73+
(this = MkVerbose() and result = "VERBOSE") or
74+
(this = MkDotAll() and result = "DOTALL") or
75+
(this = MkUnicode() and result = "MULTILINE") or
76+
(this = MkIgnoreCase() and result = "UNICODE")
77+
}
78+
}
79+
80+
/**
81+
* A unit class for adding additional flow steps for regular expressions.
82+
*/
83+
class RegexAdditionalFlowStep extends Unit {
84+
/**
85+
* Holds if the step from `node1` to `node2` should be considered a flow
86+
* step for regular expressions.
87+
*/
88+
abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
89+
90+
/**
91+
* Holds if the step from `node1` to `node2` either sets (`isSet` = true)
92+
* or unsets (`isSet` = false) parse mode `mode` for the regular expression.
93+
*/
94+
abstract predicate modifiesParseMode(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet);
95+
}
96+
97+
/**
98+
* An additional flow step for `Regex` or `NSRegularExpression`.
99+
*/
100+
class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
101+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
102+
this.modifiesParseMode(nodeFrom, nodeTo, _, _)
103+
}
104+
105+
override predicate modifiesParseMode(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet)
106+
{
107+
exists(CallExpr ce |
108+
ce.getStaticTarget().(Method).hasQualifiedName("Regex", "dotMatchesNewlines(_:)") and
109+
nodeFrom.asExpr() = ce.getQualifier() and
110+
nodeTo.asExpr() = ce and
111+
mode = MkDotAll() and
112+
// TODO: other methods
113+
// decode the value being set
114+
if ce.getArgument(0).getExpr().(BooleanLiteralExpr).getValue() = false then
115+
isSet = false // mode is set to false
116+
else
117+
isSet = true // mode is set to true OR mode is set to default (=true) OR mode is set to an unknown value
118+
)
119+
}
120+
}
121+
63122
/**
64123
* A call that evaluates a regular expression. For example, the call to `firstMatch` in:
65124
* ```

swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
*/
77

88
import swift
9+
private import RegexTracking
10+
private import codeql.swift.regex.Regex
911

1012
/**
1113
* A `Expr` containing a regular expression term, that is, either
@@ -324,7 +326,17 @@ abstract class RegExp extends Expr {
324326
* MULTILINE
325327
* UNICODE
326328
*/
327-
string getAMode() { result = this.getModeFromPrefix() }
329+
string getAMode() {
330+
// mode flags from inside the regex string
331+
result = this.getModeFromPrefix()
332+
or
333+
// mode flags applied to the regex object before evaluation
334+
exists(RegexEval e |
335+
e.getARegex() = this and
336+
RegexParseModeFlow::flow(_, DataFlow::exprNode(e.getRegexInput())) and
337+
result = "DOTALL" // TODO
338+
)
339+
}
328340

329341
/**
330342
* Holds if the `i`th character could not be parsed.

swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import swift
88
import codeql.swift.regex.RegexTreeView
9-
private import codeql.swift.dataflow.DataFlow
9+
import codeql.swift.dataflow.DataFlow
1010
private import ParseRegex
1111
private import codeql.swift.regex.Regex
1212

@@ -37,7 +37,6 @@ private module RegexUseConfig implements DataFlow::ConfigSig {
3737
predicate isSource(DataFlow::Node node) {
3838
// creation of the regex
3939
node instanceof RegexCreation
40-
// TODO: track parse mode flags.
4140
}
4241

4342
predicate isSink(DataFlow::Node node) {
@@ -46,9 +45,36 @@ private module RegexUseConfig implements DataFlow::ConfigSig {
4645
}
4746

4847
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
49-
// TODO: flow through regex methods that return a modified regex.
50-
none()
48+
any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo)
5149
}
5250
}
5351

5452
module RegexUseFlow = DataFlow::Global<RegexUseConfig>;
53+
54+
/**
55+
* A data flow configuration for tracking regular expression parse mode
56+
* flags from the point they are set to the point of use. The flow state
57+
* encodes which parse mode flag was set.
58+
*/
59+
private module RegexParseModeConfig implements DataFlow::ConfigSig {
60+
predicate isSource(DataFlow::Node node) {
61+
// parse mode flag is set
62+
any(RegexAdditionalFlowStep s).modifiesParseMode(_, node, MkDotAll(), true)
63+
}
64+
65+
predicate isBarrierIn(DataFlow::Node node) {
66+
// parse mode flag is set or unset
67+
any(RegexAdditionalFlowStep s).modifiesParseMode(_, node, MkDotAll(), _)
68+
}
69+
70+
predicate isSink(DataFlow::Node node) {
71+
// evaluation of the regex
72+
node.asExpr() = any(RegexEval eval).getRegexInput()
73+
}
74+
75+
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
76+
any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo)
77+
}
78+
}
79+
80+
module RegexParseModeFlow = DataFlow::Global<RegexParseModeConfig>;

swift/ql/test/library-tests/regex/regex.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,11 +210,11 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws {
210210
_ = try Regex("(?s)abc").firstMatch(in: input) // $ input=input modes=DOTALL regex=(?s)abc
211211
_ = try Regex("(?is)abc").firstMatch(in: input) // $ input=input modes="DOTALL | IGNORECASE" regex=(?is)abc
212212

213-
_ = try Regex("abc").dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc MISSING: modes=DOTALL
213+
_ = try Regex("abc").dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc modes=DOTALL
214214
_ = try Regex("abc").dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc
215215
_ = try Regex("abc").dotMatchesNewlines(true).dotMatchesNewlines(false).firstMatch(in: input) // $ input=input regex=abc
216-
_ = try Regex("abc").dotMatchesNewlines(false).dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc MISSING: modes=DOTALL
217-
_ = try Regex("abc").dotMatchesNewlines().ignoresCase().firstMatch(in: input) // $ input=input regex=abc MISSING: modes="DOTALL | IGNORECASE"
216+
_ = try Regex("abc").dotMatchesNewlines(false).dotMatchesNewlines(true).firstMatch(in: input) // $ input=input regex=abc modes=DOTALL
217+
_ = try Regex("abc").dotMatchesNewlines().ignoresCase().firstMatch(in: input) // $ input=input regex=abc SPURIOUS: modes=DOTALL MISSING: modes="DOTALL | IGNORECASE"
218218

219219
_ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=IGNORECASE
220220
_ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=DOTALL

0 commit comments

Comments
 (0)