@@ -110,6 +110,116 @@ class RegexDSLTests: XCTestCase {
110
110
CharacterClass . whitespace. inverted
111
111
}
112
112
}
113
+
114
+ let allNewlines = " \u{A} \u{B} \u{C} \u{D} \r \n \u{85} \u{2028} \u{2029} "
115
+ let asciiNewlines = " \u{A} \u{B} \u{C} \u{D} \r \n "
116
+
117
+ // `.newlineSequence` and `.verticalWhitespace` match the same set of
118
+ // newlines in grapheme semantic mode, and scalar mode when applied with
119
+ // OneOrMore.
120
+ for cc in [ CharacterClass . newlineSequence, . verticalWhitespace] {
121
+ for mode in [ RegexSemanticLevel . unicodeScalar, . graphemeCluster] {
122
+ try _testDSLCaptures (
123
+ ( " \n " , ( " \n " , " \n " ) ) ,
124
+ ( " \r " , ( " \r " , " \r " ) ) ,
125
+ ( " \r \n " , ( " \r \n " , " \r \n " ) ) ,
126
+ ( allNewlines, ( allNewlines [ ... ] , allNewlines [ ... ] ) ) ,
127
+ ( " abc \n def " , ( " abc \n def " , " \n " ) ) ,
128
+ ( " abc \n \r \n def " , ( " abc \n \r \n def " , " \n \r \n " ) ) ,
129
+ ( " abc \( allNewlines) def " , ( " abc \( allNewlines) def " , allNewlines [ ... ] ) ) ,
130
+ ( " abc " , nil ) ,
131
+ matchType: ( Substring, Substring) . self, == )
132
+ {
133
+ Regex {
134
+ ZeroOrMore {
135
+ cc. inverted
136
+ }
137
+ Capture {
138
+ OneOrMore ( cc)
139
+ }
140
+ ZeroOrMore {
141
+ cc. inverted
142
+ }
143
+ } . matchingSemantics ( mode)
144
+ }
145
+
146
+ // Try with ASCII-only whitespace.
147
+ try _testDSLCaptures (
148
+ ( " \n " , ( " \n " , " \n " ) ) ,
149
+ ( " \r " , ( " \r " , " \r " ) ) ,
150
+ ( " \r \n " , ( " \r \n " , " \r \n " ) ) ,
151
+ ( allNewlines, ( allNewlines [ ... ] , asciiNewlines [ ... ] ) ) ,
152
+ ( " abc \n def " , ( " abc \n def " , " \n " ) ) ,
153
+ ( " abc \n \r \n def " , ( " abc \n \r \n def " , " \n \r \n " ) ) ,
154
+ ( " abc \( allNewlines) def " , ( " abc \( allNewlines) def " , asciiNewlines [ ... ] ) ) ,
155
+ ( " abc " , nil ) ,
156
+ matchType: ( Substring, Substring) . self, == )
157
+ {
158
+ Regex {
159
+ ZeroOrMore {
160
+ cc. inverted
161
+ }
162
+ Capture {
163
+ OneOrMore ( cc)
164
+ }
165
+ ZeroOrMore {
166
+ cc. inverted
167
+ }
168
+ } . matchingSemantics ( mode) . asciiOnlyWhitespace ( )
169
+ }
170
+ }
171
+ }
172
+
173
+ // `.newlineSequence` in scalar mode may match a single `\r\n`.
174
+ // `.verticalWhitespace` may not.
175
+ for asciiOnly in [ true , false ] {
176
+ try _testDSLCaptures (
177
+ ( " \r " , " \r " ) ,
178
+ ( " \r \n " , " \r \n " ) ,
179
+ matchType: Substring . self, == )
180
+ {
181
+ Regex {
182
+ CharacterClass . newlineSequence
183
+ } . matchingSemantics ( . unicodeScalar) . asciiOnlyWhitespace ( asciiOnly)
184
+ }
185
+ try _testDSLCaptures (
186
+ ( " \r " , nil ) ,
187
+ ( " \r \n " , nil ) ,
188
+ matchType: Substring . self, == )
189
+ {
190
+ Regex {
191
+ CharacterClass . newlineSequence. inverted
192
+ } . matchingSemantics ( . unicodeScalar) . asciiOnlyWhitespace ( asciiOnly)
193
+ }
194
+ try _testDSLCaptures (
195
+ ( " \r " , " \r " ) ,
196
+ ( " \r \n " , nil ) ,
197
+ matchType: Substring . self, == )
198
+ {
199
+ Regex {
200
+ CharacterClass . verticalWhitespace
201
+ } . matchingSemantics ( . unicodeScalar) . asciiOnlyWhitespace ( asciiOnly)
202
+ }
203
+ try _testDSLCaptures (
204
+ ( " \r " , nil ) ,
205
+ ( " \r \n " , nil ) ,
206
+ matchType: Substring . self, == )
207
+ {
208
+ Regex {
209
+ CharacterClass . verticalWhitespace. inverted
210
+ } . matchingSemantics ( . unicodeScalar) . asciiOnlyWhitespace ( asciiOnly)
211
+ }
212
+ try _testDSLCaptures (
213
+ ( " \r " , nil ) ,
214
+ ( " \r \n " , nil ) ,
215
+ matchType: Substring . self, == )
216
+ {
217
+ Regex {
218
+ CharacterClass . verticalWhitespace. inverted
219
+ " \n "
220
+ } . matchingSemantics ( . unicodeScalar) . asciiOnlyWhitespace ( asciiOnly)
221
+ }
222
+ }
113
223
}
114
224
115
225
func testCharacterClassOperations( ) throws {
0 commit comments