@@ -227,6 +227,25 @@ extension String {
227
227
&& _coreIndex == core. endIndex
228
228
}
229
229
230
+ /// The number of UTF-8 code units remaining in the buffer before the
231
+ /// next unicode scalar value is reached. This simulates calling
232
+ /// `index(after: i)` until `i._coreIndex` is incremented, but doesn't
233
+ /// need a `_core` reference.
234
+ internal var _utf8ContinuationBytesUntilNextUnicodeScalar : Int {
235
+ var buffer = _buffer
236
+ var count = 0
237
+
238
+ while true {
239
+ let currentUnit = UTF8 . CodeUnit ( truncatingBitPattern: buffer)
240
+ if currentUnit & 0b1100_0000 != 0b1000_0000 {
241
+ break
242
+ }
243
+ count += 1
244
+ buffer = Index . _nextBuffer ( after: buffer)
245
+ }
246
+ return count
247
+ }
248
+
230
249
/// The value of the buffer when it is empty
231
250
internal static var _emptyBuffer : Buffer {
232
251
return ~ 0
@@ -236,7 +255,7 @@ extension String {
236
255
internal static var _bufferHiByte : Buffer {
237
256
return 0xFF << numericCast ( ( sizeof ( Buffer . self) &- 1 ) &* 8 )
238
257
}
239
-
258
+
240
259
/// Consume a byte of the given buffer: shift out the low byte
241
260
/// and put FF in the high byte
242
261
internal static func _nextBuffer( after thisBuffer: Buffer ) -> Buffer {
@@ -277,12 +296,21 @@ extension String {
277
296
// FIXME: swift-3-indexing-model: range check i?
278
297
let currentUnit = UTF8 . CodeUnit ( truncatingBitPattern: i. _buffer)
279
298
let hiNibble = currentUnit >> 4
280
- // Map the high nibble of the current code unit into the
281
- // amount by which to increment the UTF-16 index. Only when
282
- // the high nibble is 1111 do we have a surrogate pair.
299
+
300
+ // Amounts to increment the UTF-16 index based on the high nibble of a
301
+ // UTF-8 code unit. If the high nibble is:
302
+ //
303
+ // - 0b0000-0b0111: U+0000...U+007F: increment the UTF-16 pointer by 1
304
+ // - 0b1000-0b1011: UTF-8 continuation byte, do not increment
305
+ // the UTF-16 pointer
306
+ // - 0b1100-0b1110: U+0080...U+FFFF: increment the UTF-16 pointer by 1
307
+ // - 0b1111: U+10000...U+1FFFFF: increment the UTF-16 pointer by 2
283
308
let u16Increments = Int ( bitPattern:
284
309
// 1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
285
310
0b10___01___01___01___00___00___00___00___01___01___01___01___01___01___01___01 )
311
+
312
+ // Map the high nibble of the current code unit into the
313
+ // amount by which to increment the UTF-16 index.
286
314
let increment = ( u16Increments >> numericCast ( hiNibble << 1 ) ) & 0x3
287
315
let nextCoreIndex = i. _coreIndex &+ increment
288
316
let nextBuffer = Index . _nextBuffer ( after: i. _buffer)
@@ -463,9 +491,11 @@ public func < (
463
491
lhs: String . UTF8View . Index ,
464
492
rhs: String . UTF8View . Index
465
493
) -> Bool {
466
- // FIXME: swift-3-indexing-model: tests.
467
- // FIXME: swift-3-indexing-model: this implementation is wrong, it is just a
468
- // temporary HACK.
494
+ if lhs. _coreIndex == rhs. _coreIndex && lhs. _buffer != rhs. _buffer {
495
+ // The index with more continuation bytes remaining before the next
496
+ return lhs. _utf8ContinuationBytesUntilNextUnicodeScalar >
497
+ rhs. _utf8ContinuationBytesUntilNextUnicodeScalar
498
+ }
469
499
return lhs. _coreIndex < rhs. _coreIndex
470
500
}
471
501
0 commit comments