Skip to content

Commit 200d9a4

Browse files
committed
Swift: Port regex mode flag character fix from Python.
1 parent df60f56 commit 200d9a4

File tree

5 files changed

+47
-46
lines changed

5 files changed

+47
-46
lines changed

swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -277,35 +277,54 @@ abstract class RegExp extends Expr {
277277
private predicate isGroupStart(int i) { this.nonEscapedCharAt(i) = "(" and not this.inCharSet(i) }
278278

279279
/**
280-
* Holds if a parse mode starts between `start` and `end`.
280+
* Holds if the initial part of a parse mode, not containing any
281+
* mode characters is between `start` and `end`.
281282
*/
282-
private predicate flagGroupStart(int start, int end) {
283+
private predicate flagGroupStartNoModes(int start, int end) {
283284
this.isGroupStart(start) and
284285
this.getChar(start + 1) = "?" and
285286
this.getChar(start + 2) in ["i", "x", "s", "m", "w"] and
286287
end = start + 2
287288
}
288289

289290
/**
290-
* Holds if a parse mode group is between `start` and `end`, and includes the
291-
* mode flag `c`. For example the following span, with mode flag `i`:
291+
* Holds if `pos` contains a mode character from the
292+
* flag group starting at `start`.
293+
*/
294+
private predicate modeCharacter(int start, int pos) {
295+
this.flagGroupStartNoModes(start, pos)
296+
or
297+
this.modeCharacter(start, pos - 1) and
298+
this.getChar(pos) in ["i", "x", "s", "m", "w"]
299+
}
300+
301+
/**
302+
* Holds if a parse mode group is between `start` and `end`.
303+
*/
304+
private predicate flagGroupStart(int start, int end) {
305+
this.flagGroupStartNoModes(start, _) and
306+
end = max(int i | this.modeCharacter(start, i) | i + 1)
307+
}
308+
309+
/**
310+
* Holds if a parse mode group of this regex includes the mode flag `c`.
311+
* For example the following parse mode group, with mode flag `i`:
292312
* ```
293313
* (?i)
294314
* ```
295315
*/
296-
private predicate flagGroup(int start, int end, string c) {
297-
exists(int inStart, int inEnd |
298-
this.flagGroupStart(start, inStart) and
299-
this.groupContents(start, end, inStart, inEnd) and
300-
this.getChar([inStart .. inEnd - 1]) = c
316+
private predicate flag(string c) {
317+
exists(int pos |
318+
this.modeCharacter(_, pos) and
319+
this.getChar(pos) = c
301320
)
302321
}
303322

304323
/**
305324
* Gets a mode of this regular expression string if it is defined by a mode prefix.
306325
*/
307326
string getModeFromPrefix() {
308-
exists(string c | this.flagGroup(_, _, c) |
327+
exists(string c | this.flag(c) |
309328
c = "i" and result = "IGNORECASE" // case insensitive
310329
or
311330
c = "x" and result = "VERBOSE" // ignores whitespace and `#` comments within patterns

swift/ql/test/library-tests/regex/parse.expected

Lines changed: 15 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,16 +1618,13 @@ redos_variants.swift:
16181618

16191619
# 142| [RegExpConstant, RegExpNormalChar] !
16201620

1621-
# 146| [RegExpGroup] (?s)
1622-
#-----| 0 -> [RegExpConstant, RegExpNormalChar] s
1621+
# 146| [RegExpZeroWidthMatch] (?s)
16231622

16241623
# 146| [RegExpSequence] (?s)(.|\n)*!
1625-
#-----| 0 -> [RegExpGroup] (?s)
1624+
#-----| 0 -> [RegExpZeroWidthMatch] (?s)
16261625
#-----| 1 -> [RegExpStar] (.|\n)*
16271626
#-----| 2 -> [RegExpConstant, RegExpNormalChar] !
16281627

1629-
# 146| [RegExpConstant, RegExpNormalChar] s
1630-
16311628
# 146| [RegExpGroup] (.|\n)
16321629
#-----| 0 -> [RegExpAlt] .|\n
16331630

@@ -6492,61 +6489,49 @@ regex.swift:
64926489

64936490
# 206| [RegExpNamedCharacterProperty] [:aaaaa:]
64946491

6495-
# 211| [RegExpGroup] (?i)
6496-
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i
6492+
# 211| [RegExpZeroWidthMatch] (?i)
64976493

64986494
# 211| [RegExpSequence] (?i)abc
6499-
#-----| 0 -> [RegExpGroup] (?i)
6495+
#-----| 0 -> [RegExpZeroWidthMatch] (?i)
65006496
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
65016497

6502-
# 211| [RegExpConstant, RegExpNormalChar] i
6503-
65046498
# 211| [RegExpConstant, RegExpNormalChar] abc
65056499

6506-
# 212| [RegExpGroup] (?s)
6507-
#-----| 0 -> [RegExpConstant, RegExpNormalChar] s
6500+
# 212| [RegExpZeroWidthMatch] (?s)
65086501

65096502
# 212| [RegExpSequence] (?s)abc
6510-
#-----| 0 -> [RegExpGroup] (?s)
6503+
#-----| 0 -> [RegExpZeroWidthMatch] (?s)
65116504
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
65126505

6513-
# 212| [RegExpConstant, RegExpNormalChar] s
6514-
65156506
# 212| [RegExpConstant, RegExpNormalChar] abc
65166507

6517-
# 213| [RegExpGroup] (?is)
6518-
#-----| 0 -> [RegExpConstant, RegExpNormalChar] is
6508+
# 213| [RegExpZeroWidthMatch] (?is)
65196509

65206510
# 213| [RegExpSequence] (?is)abc
6521-
#-----| 0 -> [RegExpGroup] (?is)
6511+
#-----| 0 -> [RegExpZeroWidthMatch] (?is)
65226512
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
65236513

6524-
# 213| [RegExpConstant, RegExpNormalChar] is
6525-
65266514
# 213| [RegExpConstant, RegExpNormalChar] abc
65276515

65286516
# 214| [RegExpGroup] (?i-s)
6529-
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i-s
6517+
#-----| 0 -> [RegExpConstant, RegExpNormalChar] -s
65306518

65316519
# 214| [RegExpSequence] (?i-s)abc
65326520
#-----| 0 -> [RegExpGroup] (?i-s)
65336521
#-----| 1 -> [RegExpConstant, RegExpNormalChar] abc
65346522

6535-
# 214| [RegExpConstant, RegExpNormalChar] i-s
6523+
# 214| [RegExpConstant, RegExpNormalChar] -s
65366524

65376525
# 214| [RegExpConstant, RegExpNormalChar] abc
65386526

65396527
# 217| [RegExpConstant, RegExpNormalChar] abc
65406528

65416529
# 217| [RegExpSequence] abc(?i)def
65426530
#-----| 0 -> [RegExpConstant, RegExpNormalChar] abc
6543-
#-----| 1 -> [RegExpGroup] (?i)
6531+
#-----| 1 -> [RegExpZeroWidthMatch] (?i)
65446532
#-----| 2 -> [RegExpConstant, RegExpNormalChar] def
65456533

6546-
# 217| [RegExpGroup] (?i)
6547-
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i
6548-
6549-
# 217| [RegExpConstant, RegExpNormalChar] i
6534+
# 217| [RegExpZeroWidthMatch] (?i)
65506535

65516536
# 217| [RegExpConstant, RegExpNormalChar] def
65526537

@@ -6558,16 +6543,13 @@ regex.swift:
65586543
#-----| 2 -> [RegExpConstant, RegExpNormalChar] ghi
65596544

65606545
# 218| [RegExpGroup] (?i:def)
6561-
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i:def
6546+
#-----| 0 -> [RegExpConstant, RegExpNormalChar] :def
65626547

6563-
# 218| [RegExpConstant, RegExpNormalChar] i:def
6548+
# 218| [RegExpConstant, RegExpNormalChar] :def
65646549

65656550
# 218| [RegExpConstant, RegExpNormalChar] ghi
65666551

6567-
# 219| [RegExpGroup] (?i)
6568-
#-----| 0 -> [RegExpConstant, RegExpNormalChar] i
6569-
6570-
# 219| [RegExpConstant, RegExpNormalChar] i
6552+
# 219| [RegExpZeroWidthMatch] (?i)
65716553

65726554
# 219| [RegExpConstant, RegExpNormalChar] abc
65736555

swift/ql/test/library-tests/regex/regex.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws {
211211
_ = try Regex("(?i)abc").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=(?i)abc
212212
_ = try Regex("(?s)abc").firstMatch(in: input) // $ input=input modes=DOTALL regex=(?s)abc
213213
_ = try Regex("(?is)abc").firstMatch(in: input) // $ input=input modes="DOTALL | IGNORECASE" regex=(?is)abc
214-
_ = try Regex("(?i-s)abc").firstMatch(in: input) // $ input=input regex=(?i-s)abc MISSING: modes=IGNORECASE SPURIOUS: modes="DOTALL | IGNORECASE"
214+
_ = try Regex("(?i-s)abc").firstMatch(in: input) // $ input=input regex=(?i-s)abc modes=IGNORECASE
215215

216216
// these cases use parse modes on localized areas of the regex, which we don't currently support
217217
_ = try Regex("abc(?i)def").firstMatch(in: input) // $ input=input modes=IGNORECASE regex=abc(?i)def

swift/ql/test/query-tests/Security/CWE-1333/ReDoS.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
| ReDoS.swift:65:22:65:22 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
33
| ReDoS.swift:66:22:66:22 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
44
| ReDoS.swift:69:18:69:18 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
5-
| ReDoS.swift:73:26:73:33 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 'isx' and containing many repetitions of '\\n'. |
5+
| ReDoS.swift:73:26:73:33 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings starting with 'x' and containing many repetitions of '\\n'. |
66
| ReDoS.swift:77:46:77:46 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
77
| ReDoS.swift:79:57:79:57 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
88
| ReDoS.swift:82:57:82:57 | a* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |

swift/ql/test/query-tests/Security/CWE-1333/ReDoS.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func myRegexpTests(myUrl: URL) throws {
7070
let regex = try Regex(str)
7171
_ = try regex.firstMatch(in: tainted)
7272

73-
_ = try Regex(#"(?is)X(?:.|\n)*Y"#) // BAD - suggested attack should begin with 'x' or 'X', *not* 'isx' or 'isX' [WRONG]
73+
_ = try Regex(#"(?is)X(?:.|\n)*Y"#) // BAD - suggested attack should begin with 'x' or 'X', *not* 'isx' or 'isX'
7474

7575
// NSRegularExpression
7676

0 commit comments

Comments
 (0)