1
1
/**
2
2
* Library for parsing Swift regular expressions.
3
3
*
4
+ * See https://developer.apple.com/documentation/foundation/nsregularexpression
5
+ * for the regular expression syntax we aim to support.
6
+ *
4
7
* N.B. does not yet handle stripping whitespace and comments in regexes with
5
8
* the `x` (free-spacing) flag.
6
9
*/
@@ -9,6 +12,17 @@ import swift
9
12
private import RegexTracking
10
13
private import codeql.swift.regex.Regex
11
14
15
+ /**
16
+ * A mode character that can be used in a regular expression.
17
+ * ```
18
+ * NSRegularExpression accepts: dim suwxDPSUW
19
+ * Regex accepts: imns x
20
+ * ```
21
+ */
22
+ private predicate availableRegexModeCharacter ( string char ) {
23
+ char = [ "d" , "i" , "m" , "n" , "s" , "u" , "w" , "x" , "D" , "P" , "S" , "U" , "W" ]
24
+ }
25
+
12
26
/**
13
27
* A `Expr` containing a regular expression term, that is, either
14
28
* a regular expression literal, or a string literal used in a context where
@@ -283,7 +297,7 @@ abstract class RegExp extends Expr {
283
297
private predicate flagGroupStartNoModes ( int start , int end ) {
284
298
this .isGroupStart ( start ) and
285
299
this .getChar ( start + 1 ) = "?" and
286
- this .getChar ( start + 2 ) in [ "i" , "x" , "s" , "m" , "w" ] and
300
+ availableRegexModeCharacter ( this .getChar ( start + 2 ) ) and
287
301
end = start + 2
288
302
}
289
303
@@ -295,7 +309,7 @@ abstract class RegExp extends Expr {
295
309
this .flagGroupStartNoModes ( start , pos )
296
310
or
297
311
this .modeCharacter ( start , pos - 1 ) and
298
- this .getChar ( pos ) in [ "i" , "x" , "s" , "m" , "w" ]
312
+ availableRegexModeCharacter ( this .getChar ( pos ) )
299
313
}
300
314
301
315
/**
@@ -333,7 +347,10 @@ abstract class RegExp extends Expr {
333
347
or
334
348
c = "m" and result = "MULTILINE" // `^` and `$` also match beginning and end of lines
335
349
or
336
- c = "w" and result = "UNICODE" // Unicode UAX 29 word boundary mode
350
+ c = "w" and result = "UNICODEBOUNDARY" // Unicode UAX 29 word boundary mode
351
+ or
352
+ c = "u" and result = "UNICODE" // Unicode matching
353
+ // (other flags exist that are not translated here)
337
354
)
338
355
}
339
356
@@ -344,6 +361,7 @@ abstract class RegExp extends Expr {
344
361
* VERBOSE
345
362
* DOTALL
346
363
* MULTILINE
364
+ * UNICODEBOUNDARY
347
365
* UNICODE
348
366
*/
349
367
string getAMode ( ) {
0 commit comments