Skip to content

Commit b5a8a8d

Browse files
authored
Merge pull request github#13715 from geoffw0/parsemode
Swift: Recognize regular expression parse mode flags
2 parents a426010 + 2b9d25b commit b5a8a8d

File tree

6 files changed

+293
-74
lines changed

6 files changed

+293
-74
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* The regular expression library now understands mode flags specified at the beginning of a regular expression (for example `(?is)`).

swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,17 @@ abstract class RegExp extends Expr {
1616
/**
1717
* Holds if this `RegExp` has the `s` flag for multi-line matching.
1818
*/
19-
predicate isDotAll() { none() }
19+
predicate isDotAll() { this.getAMode() = "DOTALL" }
2020

2121
/**
2222
* Holds if this `RegExp` has the `i` flag for case-insensitive matching.
2323
*/
24-
predicate isIgnoreCase() { none() }
24+
predicate isIgnoreCase() { this.getAMode() = "IGNORECASE" }
2525

2626
/**
27-
* Gets the flags for this `RegExp`, or the empty string if it has no flags.
27+
* Gets a string representing the flags for this `RegExp`, or the empty string if it has no flags.
2828
*/
29-
string getFlags() { result = "" }
29+
string getFlags() { result = concat(string mode | mode = this.getAMode() | mode, " | ") }
3030

3131
/**
3232
* Helper predicate for `charSetStart(int start, int end)`.
@@ -274,6 +274,58 @@ abstract class RegExp extends Expr {
274274

275275
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
276276

277+
/**
278+
* Holds if a parse mode starts between `start` and `end`.
279+
*/
280+
private predicate flagGroupStart(int start, int end) {
281+
this.isGroupStart(start) and
282+
this.getChar(start + 1) = "?" and
283+
this.getChar(start + 2) in ["i", "x", "s", "m", "w"] and
284+
end = start + 2
285+
}
286+
287+
/**
288+
* Holds if a parse mode group is between `start` and `end`, and includes the
289+
* mode flag `c`. For example the following span, with mode flag `i`:
290+
* ```
291+
* (?i)
292+
* ```
293+
*/
294+
private predicate flagGroup(int start, int end, string c) {
295+
exists(int inStart, int inEnd |
296+
this.flagGroupStart(start, inStart) and
297+
this.groupContents(start, end, inStart, inEnd) and
298+
this.getChar([inStart .. inEnd - 1]) = c
299+
)
300+
}
301+
302+
/**
303+
* Gets a mode of this regular expression string if it is defined by a mode prefix.
304+
*/
305+
string getModeFromPrefix() {
306+
exists(string c | this.flagGroup(_, _, c) |
307+
c = "i" and result = "IGNORECASE" // case insensitive
308+
or
309+
c = "x" and result = "VERBOSE" // ignores whitespace and `#` comments within patterns
310+
or
311+
c = "s" and result = "DOTALL" // dot matches all characters, including line terminators
312+
or
313+
c = "m" and result = "MULTILINE" // `^` and `$` also match beginning and end of lines
314+
or
315+
c = "w" and result = "UNICODE" // Unicode UAX 29 word boundary mode
316+
)
317+
}
318+
319+
/**
320+
* Gets a mode (if any) of this regular expression. Can be any of:
321+
* IGNORECASE
322+
* VERBOSE
323+
* DOTALL
324+
* MULTILINE
325+
* UNICODE
326+
*/
327+
string getAMode() { result = this.getModeFromPrefix() }
328+
277329
/**
278330
* Holds if the `i`th character could not be parsed.
279331
*/
@@ -653,6 +705,8 @@ abstract class RegExp extends Expr {
653705
this.commentGroupStart(start, end)
654706
or
655707
this.simpleGroupStart(start, end)
708+
or
709+
this.flagGroupStart(start, end)
656710
}
657711

658712
/** Matches the start of a non-capturing group, e.g. `(?:` */

0 commit comments

Comments
 (0)