@@ -74,75 +74,81 @@ fileprivate extension Compiler.ByteCodeGen {
74
74
}
75
75
}
76
76
77
- mutating func emitQuotedLiteral( _ s: String ) {
78
- if options. semanticLevel == . graphemeCluster {
79
- if options. isCaseInsensitive {
80
- // future work: if all ascii, emit matchBitset instructions with
81
- // case insensitive bitsets
82
-
83
- // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
84
- builder. buildConsume { input, bounds in
85
- var iterator = s. makeIterator ( )
86
- var currentIndex = bounds. lowerBound
87
- while let ch = iterator. next ( ) {
88
- guard currentIndex < bounds. upperBound,
89
- ch. lowercased ( ) == input [ currentIndex] . lowercased ( )
90
- else { return nil }
91
- input. formIndex ( after: & currentIndex)
92
- }
93
- return currentIndex
77
+ mutating func emitScalarQuotedLiteral( _ s: String ) {
78
+ precondition ( options. semanticLevel == . unicodeScalar)
79
+ if optimizationsEnabled && !options. isCaseInsensitive {
80
+ // Match all scalars exactly, never boundary check because we're in
81
+ // unicode scalars mode
82
+ for char in s {
83
+ for scalar in char. unicodeScalars {
84
+ builder. buildMatchScalar ( scalar, boundaryCheck: false )
94
85
}
95
- } else {
96
- if optimizationsEnabled && s . allSatisfy ( { char in char . isASCII } ) {
97
- for char in s . dropLast ( 1 ) {
98
- // Note: only cr-lf is multiple scalars
99
- for scalar in char . unicodeScalars {
100
- builder . buildMatchScalar ( scalar , boundaryCheck : false )
101
- }
102
- }
103
- let lastChar = s . last!
104
- for scalar in lastChar . unicodeScalars {
105
- // Only boundary check if we are the last scalar in the last character
106
- // to make sure that there isn't a combining scalar after the quoted literal
107
- let boundaryCheck = scalar == lastChar . unicodeScalars. last!
108
- builder . buildMatchScalar ( scalar , boundaryCheck : boundaryCheck )
86
+ }
87
+ return
88
+ }
89
+
90
+ builder . buildConsume {
91
+ [ caseInsensitive = options . isCaseInsensitive ] input , bounds in
92
+ // TODO: Case folding
93
+ var iterator = s . unicodeScalars . makeIterator ( )
94
+ var currentIndex = bounds . lowerBound
95
+ while let scalar = iterator . next ( ) {
96
+ guard currentIndex < bounds . upperBound else { return nil }
97
+ if caseInsensitive {
98
+ if scalar. properties . lowercaseMapping != input . unicodeScalars [ currentIndex ] . properties . lowercaseMapping {
99
+ return nil
109
100
}
110
101
} else {
111
- builder. buildMatchSequence ( s)
102
+ if scalar != input. unicodeScalars [ currentIndex] {
103
+ return nil
104
+ }
112
105
}
106
+ input. unicodeScalars. formIndex ( after: & currentIndex)
113
107
}
114
- } else {
115
- if optimizationsEnabled && !options. isCaseInsensitive {
116
- // Match all scalars exactly, never boundary check because we're in
117
- // unicode scalars mode
118
- for char in s {
119
- for scalar in char. unicodeScalars {
120
- builder. buildMatchScalar ( scalar, boundaryCheck: false )
121
- }
108
+ return currentIndex
109
+ }
110
+ }
111
+
112
+ mutating func emitQuotedLiteral( _ s: String ) {
113
+ guard options. semanticLevel == . graphemeCluster else {
114
+ emitScalarQuotedLiteral ( s)
115
+ return
116
+ }
117
+
118
+ if options. isCaseInsensitive {
119
+ // future work: if all ascii, emit matchBitset instructions with
120
+ // case insensitive bitsets
121
+ // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
122
+ builder. buildConsume { input, bounds in
123
+ var iterator = s. makeIterator ( )
124
+ var currentIndex = bounds. lowerBound
125
+ while let ch = iterator. next ( ) {
126
+ guard currentIndex < bounds. upperBound,
127
+ ch. lowercased ( ) == input [ currentIndex] . lowercased ( )
128
+ else { return nil }
129
+ input. formIndex ( after: & currentIndex)
122
130
}
123
- } else {
124
- builder. buildConsume {
125
- [ caseInsensitive = options. isCaseInsensitive] input, bounds in
126
- // TODO: Case folding
127
- var iterator = s. unicodeScalars. makeIterator ( )
128
- var currentIndex = bounds. lowerBound
129
- while let scalar = iterator. next ( ) {
130
- guard currentIndex < bounds. upperBound else { return nil }
131
- if caseInsensitive {
132
- if scalar. properties. lowercaseMapping != input. unicodeScalars [ currentIndex] . properties. lowercaseMapping {
133
- return nil
134
- }
135
- } else {
136
- if scalar != input. unicodeScalars [ currentIndex] {
137
- return nil
138
- }
139
- }
140
- input. unicodeScalars. formIndex ( after: & currentIndex)
141
- }
142
- return currentIndex
131
+ return currentIndex
132
+ }
133
+ return
134
+ }
135
+
136
+ if optimizationsEnabled && s. allSatisfy ( { char in char. isASCII} ) {
137
+ let lastIdx = s. unicodeScalars. indices. last!
138
+ for idx in s. unicodeScalars. indices {
139
+ if idx == lastIdx {
140
+ // Only boundary check if we are the last scalar in the last character
141
+ // to make sure that there isn't a combining scalar after the quoted literal
142
+ builder. buildMatchScalar ( s. unicodeScalars [ idx] , boundaryCheck: true )
143
+ } else {
144
+ builder. buildMatchScalar ( s. unicodeScalars [ idx] , boundaryCheck: false )
143
145
}
144
146
}
147
+ return
145
148
}
149
+
150
+ builder. buildMatchSequence ( s)
151
+ return
146
152
}
147
153
148
154
mutating func emitBackreference(
@@ -281,7 +287,6 @@ fileprivate extension Compiler.ByteCodeGen {
281
287
}
282
288
283
289
mutating func emitScalar( _ s: UnicodeScalar ) throws {
284
- // TODO: Native instruction buildMatchScalar(s)
285
290
if options. isCaseInsensitive {
286
291
// TODO: e.g. buildCaseInsensitiveMatchScalar(s)
287
292
builder. buildConsume ( by: consumeScalar {
@@ -290,17 +295,11 @@ fileprivate extension Compiler.ByteCodeGen {
290
295
return
291
296
}
292
297
293
- if optimizationsEnabled { // should we just do this unconditionally?
294
- builder. buildMatchScalar ( s, boundaryCheck: false )
295
- } else {
296
- builder. buildConsume ( by: consumeScalar {
297
- $0 == s
298
- } )
299
- }
298
+ builder. buildMatchScalar ( s, boundaryCheck: false )
300
299
}
301
300
302
301
mutating func emitCharacter( _ c: Character ) throws {
303
- // Unicode scalar matches the specific scalars that comprise a character
302
+ // Unicode scalar mode matches the specific scalars that comprise a character
304
303
if options. semanticLevel == . unicodeScalar {
305
304
for scalar in c. unicodeScalars {
306
305
try emitScalar ( scalar)
@@ -317,12 +316,13 @@ fileprivate extension Compiler.ByteCodeGen {
317
316
? input. index ( after: bounds. lowerBound)
318
317
: nil
319
318
}
319
+ return
320
320
}
321
321
322
322
if optimizationsEnabled && c. isASCII {
323
- for scalar in c. unicodeScalars {
324
- let boundaryCheck = scalar == c. unicodeScalars. last!
325
- builder. buildMatchScalar ( scalar , boundaryCheck: boundaryCheck )
323
+ let lastIdx = c. unicodeScalars. indices . last!
324
+ for idx in c. unicodeScalars. indices {
325
+ builder. buildMatchScalar ( c . unicodeScalars [ idx ] , boundaryCheck: idx == lastIdx )
326
326
}
327
327
return
328
328
}
0 commit comments