@@ -45,10 +45,10 @@ fileprivate extension Compiler.ByteCodeGen {
45
45
emitAny ( )
46
46
47
47
case let . char( c) :
48
- try emitCharacter ( c)
48
+ emitCharacter ( c)
49
49
50
50
case let . scalar( s) :
51
- try emitScalar ( s)
51
+ emitScalar ( s)
52
52
53
53
case let . assertion( kind) :
54
54
try emitAssertion ( kind. ast)
@@ -74,81 +74,32 @@ fileprivate extension Compiler.ByteCodeGen {
74
74
}
75
75
}
76
76
77
- mutating func emitScalarQuotedLiteral( _ s: String ) {
78
- precondition ( options. semanticLevel == . unicodeScalar)
79
- if optimizationsEnabled && !options. isCaseInsensitive {
80
- // Match all scalars exactly, never boundary check because we're in
81
- // unicode scalars mode
82
- for char in s {
83
- for scalar in char. unicodeScalars {
84
- builder. buildMatchScalar ( scalar, boundaryCheck: false )
85
- }
86
- }
87
- return
88
- }
89
-
90
- builder. buildConsume {
91
- [ caseInsensitive = options. isCaseInsensitive] input, bounds in
92
- // TODO: Case folding
93
- var iterator = s. unicodeScalars. makeIterator ( )
94
- var currentIndex = bounds. lowerBound
95
- while let scalar = iterator. next ( ) {
96
- guard currentIndex < bounds. upperBound else { return nil }
97
- if caseInsensitive {
98
- if scalar. properties. lowercaseMapping != input. unicodeScalars [ currentIndex] . properties. lowercaseMapping {
99
- return nil
100
- }
101
- } else {
102
- if scalar != input. unicodeScalars [ currentIndex] {
103
- return nil
104
- }
105
- }
106
- input. unicodeScalars. formIndex ( after: & currentIndex)
107
- }
108
- return currentIndex
109
- }
110
- }
111
-
112
77
mutating func emitQuotedLiteral( _ s: String ) {
113
78
guard options. semanticLevel == . graphemeCluster else {
114
- emitScalarQuotedLiteral ( s)
115
- return
116
- }
117
-
118
- if options. isCaseInsensitive {
119
- // future work: if all ascii, emit matchBitset instructions with
120
- // case insensitive bitsets
121
- // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
122
- builder. buildConsume { input, bounds in
123
- var iterator = s. makeIterator ( )
124
- var currentIndex = bounds. lowerBound
125
- while let ch = iterator. next ( ) {
126
- guard currentIndex < bounds. upperBound,
127
- ch. lowercased ( ) == input [ currentIndex] . lowercased ( )
128
- else { return nil }
129
- input. formIndex ( after: & currentIndex)
79
+ for char in s {
80
+ for scalar in char. unicodeScalars {
81
+ emitScalar ( scalar)
130
82
}
131
- return currentIndex
132
83
}
133
84
return
134
85
}
135
86
87
+ // Fast path for eliding boundary checks for an all ascii quoted literal
136
88
if optimizationsEnabled && s. allSatisfy ( { char in char. isASCII} ) {
137
89
let lastIdx = s. unicodeScalars. indices. last!
138
90
for idx in s. unicodeScalars. indices {
139
- if idx == lastIdx {
140
- // Only boundary check if we are the last scalar in the last character
141
- // to make sure that there isn't a combining scalar after the quoted literal
142
- builder. buildMatchScalar ( s . unicodeScalars [ idx ] , boundaryCheck: true )
91
+ let boundaryCheck = idx == lastIdx
92
+ let scalar = s . unicodeScalars [ idx ]
93
+ if options . isCaseInsensitive && scalar. properties . isCased {
94
+ builder. buildMatchScalarCaseInsensitive ( scalar , boundaryCheck: boundaryCheck )
143
95
} else {
144
- builder. buildMatchScalar ( s . unicodeScalars [ idx ] , boundaryCheck: false )
96
+ builder. buildMatchScalar ( scalar , boundaryCheck: boundaryCheck )
145
97
}
146
98
}
147
99
return
148
100
}
149
101
150
- builder. buildMatchSequence ( s)
151
- return
102
+ for c in s { emitCharacter ( c) }
152
103
}
153
104
154
105
mutating func emitBackreference(
@@ -286,35 +237,29 @@ fileprivate extension Compiler.ByteCodeGen {
286
237
}
287
238
}
288
239
289
- mutating func emitScalar( _ s: UnicodeScalar ) throws {
290
- if options. isCaseInsensitive {
291
- // TODO: e.g. buildCaseInsensitiveMatchScalar(s)
292
- builder. buildConsume ( by: consumeScalar {
293
- $0. properties. lowercaseMapping == s. properties. lowercaseMapping
294
- } )
295
- return
240
+ mutating func emitScalar( _ s: UnicodeScalar ) {
241
+ if options. isCaseInsensitive && s. properties. isCased {
242
+ builder. buildMatchScalarCaseInsensitive ( s, boundaryCheck: false )
243
+ } else {
244
+ builder. buildMatchScalar ( s, boundaryCheck: false )
296
245
}
297
-
298
- builder. buildMatchScalar ( s, boundaryCheck: false )
299
246
}
300
247
301
- mutating func emitCharacter( _ c: Character ) throws {
248
+ mutating func emitCharacter( _ c: Character ) {
302
249
// Unicode scalar mode matches the specific scalars that comprise a character
303
250
if options. semanticLevel == . unicodeScalar {
304
251
for scalar in c. unicodeScalars {
305
- try emitScalar ( scalar)
252
+ emitScalar ( scalar)
306
253
}
307
254
return
308
255
}
309
256
310
257
if options. isCaseInsensitive && c. isCased {
311
- // TODO: buildCaseInsensitiveMatch(c) or buildMatch(c, caseInsensitive: true)
312
- builder. buildConsume { input, bounds in
313
- let inputChar = input [ bounds. lowerBound] . lowercased ( )
314
- let matchChar = c. lowercased ( )
315
- return inputChar == matchChar
316
- ? input. index ( after: bounds. lowerBound)
317
- : nil
258
+ if optimizationsEnabled && c. isASCII {
259
+ // c.isCased ensures that c is not CR-LF, so we know that c is a single scalar
260
+ builder. buildMatchScalarCaseInsensitive ( c. unicodeScalars. last!, boundaryCheck: true )
261
+ } else {
262
+ builder. buildMatchCaseInsensitive ( c)
318
263
}
319
264
return
320
265
}
0 commit comments