Skip to content

Commit 33a5ba0

Browse files
committed
Swift: Add explanatory comments and (minimal) support for additional regex mode flags.
1 parent 32a2930 commit 33a5ba0

File tree

2 files changed

+26
-5
lines changed

2 files changed

+26
-5
lines changed

swift/ql/lib/codeql/swift/regex/Regex.qll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ private newtype TRegexParseMode =
120120
MkVerbose() or // ignores whitespace and `#` comments within patterns
121121
MkDotAll() or // dot matches all characters, including line terminators
122122
MkMultiLine() or // `^` and `$` also match beginning and end of lines
123-
MkUnicode() // Unicode UAX 29 word boundary mode
123+
MkUnicodeBoundary() or // Unicode UAX 29 word boundary mode
124+
MkUnicode() // Unicode matching
124125

125126
/**
126127
* A regular expression parse mode flag.
@@ -138,6 +139,8 @@ class RegexParseMode extends TRegexParseMode {
138139
or
139140
this = MkMultiLine() and result = "MULTILINE"
140141
or
142+
this = MkUnicodeBoundary() and result = "UNICODEBOUNDARY"
143+
or
141144
this = MkUnicode() and result = "UNICODE"
142145
}
143146

@@ -249,7 +252,7 @@ class NSRegularExpressionRegexAdditionalFlowStep extends RegexAdditionalFlowStep
249252
.getMember()
250253
.(FieldDecl)
251254
.hasQualifiedName("NSRegularExpression.Options", "useUnicodeWordBoundaries") and
252-
mode = MkUnicode() and
255+
mode = MkUnicodeBoundary() and
253256
isSet = true
254257
}
255258
}

swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
/**
22
* Library for parsing Swift regular expressions.
33
*
4+
* See https://developer.apple.com/documentation/foundation/nsregularexpression
5+
* for the regular expression syntax we aim to support.
6+
*
47
* N.B. does not yet handle stripping whitespace and comments in regexes with
58
* the `x` (free-spacing) flag.
69
*/
@@ -9,6 +12,17 @@ import swift
912
private import RegexTracking
1013
private import codeql.swift.regex.Regex
1114

15+
/**
16+
* A mode character that can be used in a regular expression.
17+
* ```
18+
* NSRegularExpression accepts: dim suwxDPSUW
19+
* Regex accepts: imns x
20+
* ```
21+
*/
22+
private predicate availableRegexModeCharacter(string char) {
23+
char = ["d", "i", "m", "n", "s", "u", "w", "x", "D", "P", "S", "U", "W"]
24+
}
25+
1226
/**
1327
* A `Expr` containing a regular expression term, that is, either
1428
* a regular expression literal, or a string literal used in a context where
@@ -283,7 +297,7 @@ abstract class RegExp extends Expr {
283297
private predicate flagGroupStartNoModes(int start, int end) {
284298
this.isGroupStart(start) and
285299
this.getChar(start + 1) = "?" and
286-
this.getChar(start + 2) in ["i", "x", "s", "m", "w"] and
300+
availableRegexModeCharacter(this.getChar(start + 2)) and
287301
end = start + 2
288302
}
289303

@@ -295,7 +309,7 @@ abstract class RegExp extends Expr {
295309
this.flagGroupStartNoModes(start, pos)
296310
or
297311
this.modeCharacter(start, pos - 1) and
298-
this.getChar(pos) in ["i", "x", "s", "m", "w"]
312+
availableRegexModeCharacter(this.getChar(pos))
299313
}
300314

301315
/**
@@ -333,7 +347,10 @@ abstract class RegExp extends Expr {
333347
or
334348
c = "m" and result = "MULTILINE" // `^` and `$` also match beginning and end of lines
335349
or
336-
c = "w" and result = "UNICODE" // Unicode UAX 29 word boundary mode
350+
c = "w" and result = "UNICODEBOUNDARY" // Unicode UAX 29 word boundary mode
351+
or
352+
c = "u" and result = "UNICODE" // Unicode matching
353+
// (other flags exist that are not translated here)
337354
)
338355
}
339356

@@ -344,6 +361,7 @@ abstract class RegExp extends Expr {
344361
* VERBOSE
345362
* DOTALL
346363
* MULTILINE
364+
* UNICODEBOUNDARY
347365
* UNICODE
348366
*/
349367
string getAMode() {

0 commit comments

Comments
 (0)