@@ -25,8 +25,8 @@ struct Regex<Output> : RegexComponent {
25
25
26
26
init ( _ pattern: String ) throws where Output == AnyRegexOutput { }
27
27
28
- func ignoresCase( _ ignoresCase: Bool = true ) -> Regex < Regex < Output > . RegexOutput > { return self }
29
- func dotMatchesNewlines( _ dotMatchesNewlines: Bool = true ) -> Regex < Regex < Output > . RegexOutput > { return self }
28
+ func ignoresCase( _ ignoresCase: Bool = true ) -> Regex < Regex < Output > . RegexOutput > { return self }
29
+ func dotMatchesNewlines( _ dotMatchesNewlines: Bool = true ) -> Regex < Regex < Output > . RegexOutput > { return self }
30
30
31
31
func firstMatch( in string: String ) throws -> Regex < Output > . Match ? { return nil }
32
32
@@ -56,7 +56,8 @@ class NSRegularExpression : NSObject {
56
56
struct Options : OptionSet {
57
57
var rawValue : UInt
58
58
59
- static var caseInsensitive : NSRegularExpression . Options { get { return Options ( rawValue: 1 ) } }
59
+ static var caseInsensitive : NSRegularExpression . Options { get { return Options ( rawValue: 1 << 0 ) } }
60
+ static var dotMatchesLineSeparators : NSRegularExpression . Options { get { return Options ( rawValue: 1 << 1 ) } }
60
61
}
61
62
62
63
struct MatchingOptions : OptionSet {
@@ -111,62 +112,106 @@ func myRegexpVariantsTests(myUrl: URL) throws {
111
112
_ = try re9. firstMatch ( in: tainted)
112
113
113
114
// BAD - does not match double quotes for attribute values
114
- let re10 = try Regex ( #"<script(\s|\w|=|')*?>.*?<\/script[^>]*>"# ) . ignoresCase ( true ) . dotMatchesNewlines ( true )
115
- _ = try re10. firstMatch ( in: tainted)
115
+ let re10a = try Regex ( #"(?is)<script(\s|\w|=|')*?>.*?<\/script[^>]*>"# )
116
+ _ = try re10a. firstMatch ( in: tainted)
117
+ // BAD - does not match double quotes for attribute values
118
+ let re10b = try Regex ( #"<script(\s|\w|=|')*?>.*?<\/script[^>]*>"# ) . ignoresCase ( true ) . dotMatchesNewlines ( true )
119
+ _ = try re10b. firstMatch ( in: tainted)
120
+ // BAD - does not match double quotes for attribute values
121
+ let options10 : NSRegularExpression . Options = [ . caseInsensitive, . dotMatchesLineSeparators]
122
+ let ns10 = try NSRegularExpression ( pattern: #"<script(\s|\w|=|')*?>.*?<\/script[^>]*>"# , options: options10)
123
+ _ = ns10. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
116
124
117
125
// BAD - does not match tabs between attributes
118
- let re11 = try Regex ( #"<script( |\n|\w|=|'|")*?>.*?<\/script[^>]*>"# ) . ignoresCase ( true ) . dotMatchesNewlines ( true )
119
- _ = try re11. firstMatch ( in: tainted)
126
+ let re11a = try Regex ( #"(?is)<script( |\n|\w|=|'|")*?>.*?<\/script[^>]*>"# )
127
+ _ = try re11a. firstMatch ( in: tainted)
128
+ // BAD - does not match tabs between attributes
129
+ let re11b = try Regex ( #"<script( |\n|\w|=|'|")*?>.*?<\/script[^>]*>"# ) . ignoresCase ( true ) . dotMatchesNewlines ( true )
130
+ _ = try re11b. firstMatch ( in: tainted)
131
+ // BAD - does not match tabs between attributes
132
+ let options11 : NSRegularExpression . Options = [ . caseInsensitive, . dotMatchesLineSeparators]
133
+ let ns11 = try NSRegularExpression ( pattern: #"<script( |\n|\w|=|'|")*?>.*?<\/script[^>]*>"# , options: options11)
134
+ _ = ns11. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
120
135
121
136
// BAD - does not match uppercase SCRIPT tags
122
- let re12 = try Regex ( #"<script.*?>.*?<\/script[^>]*>"# ) . dotMatchesNewlines ( true )
123
- _ = try re12. firstMatch ( in: tainted)
137
+ let re12a = try Regex ( #"(?s)<script.*?>.*?<\/script[^>]*>"# )
138
+ _ = try re12a. firstMatch ( in: tainted)
139
+ // BAD - does not match uppercase SCRIPT tags
140
+ let re12b = try Regex ( #"<script.*?>.*?<\/script[^>]*>"# ) . dotMatchesNewlines ( true )
141
+ _ = try re12b. firstMatch ( in: tainted)
142
+ // BAD - does not match uppercase SCRIPT tags
143
+ let ns12 = try NSRegularExpression ( pattern: #"<script.*?>.*?<\/script[^>]*>"# , options: . dotMatchesLineSeparators)
144
+ _ = ns12. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
124
145
125
146
// BAD - does not match mixed case script tags
126
- let re13 = try Regex ( #"<(script|SCRIPT).*?>.*?<\/(script|SCRIPT)[^>]*>"# ) . dotMatchesNewlines ( true )
127
- _ = try re13. firstMatch ( in: tainted)
147
+ let re13a = try Regex ( #"(?s)<(script|SCRIPT).*?>.*?<\/(script|SCRIPT)[^>]*>"# )
148
+ _ = try re13a. firstMatch ( in: tainted)
149
+ // BAD - does not match mixed case script tags
150
+ let re13b = try Regex ( #"<(script|SCRIPT).*?>.*?<\/(script|SCRIPT)[^>]*>"# ) . dotMatchesNewlines ( true )
151
+ _ = try re13b. firstMatch ( in: tainted)
152
+ // BAD - does not match mixed case script tags
153
+ let ns13 = try NSRegularExpression ( pattern: #"<(script|SCRIPT).*?>.*?<\/(script|SCRIPT)[^>]*>"# , options: . dotMatchesLineSeparators)
154
+ _ = ns13. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
128
155
129
156
// BAD - doesn't match newlines in the end tag
130
- let re14 = try Regex ( #"<script[^>]*?>[\s\S]*?<\/script.*>"# ) . ignoresCase ( true )
131
- _ = try re14. firstMatch ( in: tainted)
157
+ let re14a = try Regex ( #"(?i)<script[^>]*?>[\s\S]*?<\/script.*>"# )
158
+ _ = try re14a. firstMatch ( in: tainted)
159
+ // BAD - doesn't match newlines in the end tag
160
+ let re14b = try Regex ( #"<script[^>]*?>[\s\S]*?<\/script.*>"# ) . ignoresCase ( true )
161
+ _ = try re14b. firstMatch ( in: tainted)
162
+ // BAD - doesn't match newlines in the end tag
163
+ let ns14 = try NSRegularExpression ( pattern: #"<script[^>]*?>[\s\S]*?<\/script.*>"# , options: . caseInsensitive)
164
+ _ = ns14. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
132
165
133
166
// GOOD
134
- let re15 = try Regex ( #"<script[^>]*?>[\s\S]*?<\/script[^>]*?>"# ) . ignoresCase ( true )
135
- _ = try re15. firstMatch ( in: tainted)
167
+ let re15a = try Regex ( #"(?i)<script[^>]*?>[\s\S]*?<\/script[^>]*?>"# )
168
+ _ = try re15a. firstMatch ( in: tainted)
169
+ // GOOD
170
+ let re15b = try Regex ( #"<script[^>]*?>[\s\S]*?<\/script[^>]*?>"# ) . ignoresCase ( true )
171
+ _ = try re15b. firstMatch ( in: tainted)
172
+ // GOOD
173
+ let ns15 = try NSRegularExpression ( pattern: #"<script[^>]*?>[\s\S]*?<\/script[^>]*?>"# , options: . caseInsensitive)
174
+ _ = ns15. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
136
175
137
176
// BAD - doesn't match comments with the right capture groups
138
177
let re16 = try Regex ( #"<(?:!--([\S|\s]*?)-->)|([^\/\s>]+)[\S\s]*?>"# )
139
178
_ = try re16. firstMatch ( in: tainted)
179
+ // BAD - doesn't match comments with the right capture groups
180
+ let ns16 = try NSRegularExpression ( pattern: #"<(?:!--([\S|\s]*?)-->)|([^\/\s>]+)[\S\s]*?>"# )
181
+ _ = ns16. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
140
182
141
183
// BAD - capture groups
142
184
let re17 = try Regex ( #"<(?:(?:\/([^>]+)>)|(?:!--([\S|\s]*?)-->)|(?:([^\/\s>]+)((?:\s+[\w\-:.]+(?:\s*=\s*?(?:(?:"[^"]*")|(?:'[^']*')|[^\s"'\/>]+))?)*)[\S\s]*?(\/?)>))"# )
143
185
_ = try re17. firstMatch ( in: tainted)
186
+ // BAD - capture groups
187
+ let ns17 = try NSRegularExpression ( pattern: #"<(?:(?:\/([^>]+)>)|(?:!--([\S|\s]*?)-->)|(?:([^\/\s>]+)((?:\s+[\w\-:.]+(?:\s*=\s*?(?:(?:"[^"]*")|(?:'[^']*')|[^\s"'\/>]+))?)*)[\S\s]*?(\/?)>))"# , options: . caseInsensitive)
188
+ _ = ns17. firstMatch ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
144
189
145
190
// BAD - too strict matching on the end tag
146
- let ns1 = try NSRegularExpression ( pattern: #"<script\b[^>]*>([\s\S]*?)<\/script>"# , options: . caseInsensitive)
147
- _ = ns1 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
191
+ let ns2_1 = try NSRegularExpression ( pattern: #"<script\b[^>]*>([\s\S]*?)<\/script>"# , options: . caseInsensitive)
192
+ _ = ns2_1 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
148
193
149
194
// BAD - capture groups
150
- let ns2 = try NSRegularExpression ( pattern: #"(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--.*?--\s*)+>)"# , options: . caseInsensitive)
151
- _ = ns2 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
195
+ let ns2_2 = try NSRegularExpression ( pattern: #"(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--.*?--\s*)+>)"# , options: . caseInsensitive)
196
+ _ = ns2_2 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
152
197
153
198
// BAD - capture groups
154
- let ns3 = try NSRegularExpression ( pattern: #"<(?:(?:!--([\w\W]*?)-->)|(?:!\[CDATA\[([\w\W]*?)\]\]>)|(?:!DOCTYPE([\w\W]*?)>)|(?:\?([^\s\/<>]+) ?([\w\W]*?)[?/]>)|(?:\/([A-Za-z][A-Za-z0-9\-_\:\.]*)>)|(?:([A-Za-z][A-Za-z0-9\-_\:\.]*)((?:\s+[^"'>]+(?:(?:"[^"]*")|(?:'[^']*')|[^>]*))*|\/|\s+)>))"# )
155
- _ = ns3 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
199
+ let ns2_3 = try NSRegularExpression ( pattern: #"<(?:(?:!--([\w\W]*?)-->)|(?:!\[CDATA\[([\w\W]*?)\]\]>)|(?:!DOCTYPE([\w\W]*?)>)|(?:\?([^\s\/<>]+) ?([\w\W]*?)[?/]>)|(?:\/([A-Za-z][A-Za-z0-9\-_\:\.]*)>)|(?:([A-Za-z][A-Za-z0-9\-_\:\.]*)((?:\s+[^"'>]+(?:(?:"[^"]*")|(?:'[^']*')|[^>]*))*|\/|\s+)>))"# )
200
+ _ = ns2_3 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
156
201
157
202
// BAD - capture groups
158
- let ns4 = try NSRegularExpression ( pattern: #"<!--([\w\W]*?)-->|<([^>]*?)>"# )
159
- _ = ns4 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
203
+ let ns2_4 = try NSRegularExpression ( pattern: #"<!--([\w\W]*?)-->|<([^>]*?)>"# )
204
+ _ = ns2_4 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
160
205
161
206
// GOOD - it's used with the ignorecase flag
162
- let ns5 = try NSRegularExpression ( pattern: #"<script([^>]*)>([\\S\\s]*?)<\/script([^>]*)>"# , options: . caseInsensitive)
163
- _ = ns5 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
207
+ let ns2_5 = try NSRegularExpression ( pattern: #"<script([^>]*)>([\\S\\s]*?)<\/script([^>]*)>"# , options: . caseInsensitive)
208
+ _ = ns2_5 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
164
209
165
210
// BAD - doesn't match --!>
166
- let ns6 = try NSRegularExpression ( pattern: #"-->"# )
167
- _ = ns6 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
211
+ let ns2_6 = try NSRegularExpression ( pattern: #"-->"# )
212
+ _ = ns2_6 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
168
213
169
214
// GOOD
170
- let ns7 = try NSRegularExpression ( pattern: #"^>|^->|<!--|-->|--!>|<!-$"# )
171
- _ = ns7 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
215
+ let ns2_7 = try NSRegularExpression ( pattern: #"^>|^->|<!--|-->|--!>|<!-$"# )
216
+ _ = ns2_7 . matches ( in: tainted, range: NSMakeRange ( 0 , tainted. utf16. count) )
172
217
}
0 commit comments