@@ -16,17 +16,17 @@ abstract class RegExp extends Expr {
16
16
/**
17
17
* Holds if this `RegExp` has the `s` flag for multi-line matching.
18
18
*/
19
- predicate isDotAll ( ) { none ( ) }
19
+ predicate isDotAll ( ) { this . getAMode ( ) = "DOTALL" }
20
20
21
21
/**
22
22
* Holds if this `RegExp` has the `i` flag for case-insensitive matching.
23
23
*/
24
- predicate isIgnoreCase ( ) { none ( ) }
24
+ predicate isIgnoreCase ( ) { this . getAMode ( ) = "IGNORECASE" }
25
25
26
26
/**
27
27
* Gets the flags for this `RegExp`, or the empty string if it has no flags.
28
28
*/
29
- string getFlags ( ) { result = "" }
29
+ string getFlags ( ) { result = concat ( string mode | mode = this . getAMode ( ) | mode , " | " ) }
30
30
31
31
/**
32
32
* Helper predicate for `charSetStart(int start, int end)`.
@@ -274,6 +274,61 @@ abstract class RegExp extends Expr {
274
274
275
275
private predicate isGroupStart ( int i ) { this .nonEscapedCharAt ( i ) = "(" and not this .inCharSet ( i ) }
276
276
277
+ /**
278
+ * Holds if `start` and `end` are the range of the mode prefix substring (if any) of this
279
+ * regular expression, and `c` is a mode prefix character specified in it. For example
280
+ * in the following regular expression, `start` is `0`, `end` is `3` and `c` is `i`.
281
+ * ```
282
+ * (?i)one|two
283
+ * ```
284
+ */
285
+ private predicate flagGroupStart ( int start , int end , string c ) {
286
+ // TODO: I believe this fails with multiple mode specifiers such as (?is) at the moment.
287
+ this .isGroupStart ( start ) and
288
+ this .getChar ( start + 1 ) = "?" and
289
+ end = start + 3 and
290
+ c = this .getChar ( start + 2 ) and
291
+ c in [ "i" , "m" , "s" , "u" , "x" , "U" ]
292
+ }
293
+
294
+ /**
295
+ * Gets a mode of this regular expression string if it is defined by a mode prefix.
296
+ */
297
+ string getModeFromPrefix ( ) {
298
+ exists ( string c | this .flagGroupStart ( _, _, c ) |
299
+ // TODO: are these correct in Swift?
300
+ c = "i" and result = "IGNORECASE"
301
+ or
302
+ c = "m" and result = "MULTILINE"
303
+ or
304
+ c = "s" and result = "DOTALL"
305
+ or
306
+ c = "u" and result = "UNICODE"
307
+ or
308
+ c = "x" and result = "VERBOSE"
309
+ or
310
+ c = "U" and result = "UNICODECLASS"
311
+ )
312
+ }
313
+
314
+ /**
315
+ * Gets a mode (if any) of this regular expression. Can be any of:
316
+ * DEBUG
317
+ * IGNORECASE
318
+ * MULTILINE
319
+ * DOTALL
320
+ * UNICODE
321
+ * VERBOSE
322
+ * UNICODECLASS
323
+ */
324
+ string getAMode ( ) {
325
+ /* TODO
326
+ result != "None" and
327
+ usedAsRegex(this, result, _)
328
+ or*/
329
+ result = this .getModeFromPrefix ( )
330
+ }
331
+
277
332
/**
278
333
* Holds if the `i`th character could not be parsed.
279
334
*/
0 commit comments