@@ -219,22 +219,7 @@ fileprivate extension Compiler.ByteCodeGen {
219
219
return
220
220
}
221
221
222
- // if s.value < 0x300 {
223
- // // lily todo: make sure this is correct + add compiler option check after it's merged in
224
- //
225
- // // we unconditionally match against the scalar using consumeScalar in the else case
226
- // // so maybe this check is uneccessary??
227
- // // I thought having it be < 0x300 made sure we didn't have to worry about any combining stuff
228
- // // but in the else case we just unconditionally consume and check the value
229
- // // i think this is all redundant
230
- // builder.buildMatchScalar(s, boundaryCheck: false)
231
- // return
232
- // }
233
- //
234
- // builder.buildConsume(by: consumeScalar {
235
- // $0 == s
236
- // })
237
- if optimizationsEnabled {
222
+ if optimizationsEnabled { // lily note: should we just do this unconditionally?
238
223
builder. buildMatchScalar ( s, boundaryCheck: false )
239
224
} else {
240
225
builder. buildConsume ( by: consumeScalar {
@@ -263,21 +248,11 @@ fileprivate extension Compiler.ByteCodeGen {
263
248
}
264
249
}
265
250
266
- // if c.unicodeScalars.count == 1,
267
- // let first = c.unicodeScalars.first,
268
- // first.value < 0x300 { // lily todo: check this more carefully
269
- // if we have a single scalar then this must not be an extended grapheme cluster
270
- // so it must be a character that can be exactly matched by its first scalar
271
- // cr-lf has two scalars right? yes it has two
272
-
273
- // i think one these two checks are redundant, I think we only need the second?
274
- // ask alex?
275
-
276
- // we can only match against characters that have a single cannonical equivalence
277
- // so I think that rules out any latin in here, so just use ascii for now
278
- // we also need to exclude our good non-single-scalar-ascii friend cr-lf
279
- if optimizationsEnabled && c. isASCII && c != " \r \n " {
280
- builder. buildMatchScalar ( c. unicodeScalars. first!, boundaryCheck: true )
251
+ if optimizationsEnabled && c. isASCII {
252
+ for scalar in c. unicodeScalars {
253
+ let boundaryCheck = scalar == c. unicodeScalars. last!
254
+ builder. buildMatchScalar ( scalar, boundaryCheck: boundaryCheck)
255
+ }
281
256
return
282
257
}
283
258
@@ -786,29 +761,14 @@ fileprivate extension Compiler.ByteCodeGen {
786
761
return currentIndex
787
762
}
788
763
} else {
789
- // if we have any extended latin in our characters then we have to
790
- // respect cannoical equivalence, so we cannot match against scalars exactly
791
- // so match against all single scalar ascii
792
-
793
- // lily todo: which strings are nfc invariant and matchable by direct scalar comparison?
794
- // alternatively: loop over characters in s and emit either matchScalar or matchCharacter depending on if it is NFC invariant
795
- // getting rid of matchSeq entirely does also get rid of the weird ARC
796
- if optimizationsEnabled {
797
- for c in s {
798
- // Each character needs to be NFC invariant in order for us to match it directly by scalar value in grapheme cluster mode
799
- // lily temp: use isASCII for now, ask alex what exactly this check should be
800
- if c. isASCII && c != " \r \n " {
801
- builder. buildMatchScalar ( c. unicodeScalars. first!, boundaryCheck: false )
802
- } else {
803
- // let's think about this carefully
804
- // what if our quoted literal is an ascii character + combining accent
805
- // what are the characters in the loop?
806
-
807
- // I believe that if we ever have ascii + combining character in our input
808
- // string will automatically combine them into a unified character, so itll fall into this case
809
-
810
- // so I don't think we ever need that boundaryCheck to be enabled, except at the end of this sequence
811
- builder. buildMatch ( c)
764
+ if optimizationsEnabled && s. allSatisfy ( { char in char. isASCII} ) {
765
+ for char in s {
766
+ // Note: only cr-lf is multiple scalars
767
+ for scalar in char. unicodeScalars {
768
+ // Only boundary check if we are the last scalar in the last character
769
+ // to make sure that there isn't a combining scalar after the quoted literal
770
+ let boundaryCheck = char == s. last! && scalar == char. unicodeScalars. last!
771
+ builder. buildMatchScalar ( scalar, boundaryCheck: boundaryCheck)
812
772
}
813
773
}
814
774
} else {
0 commit comments