@@ -174,10 +174,10 @@ extension String {
174
174
175
175
init ( _ _core: _StringCore ) {
176
176
self . _core = _core
177
- self . _endIndex = Index ( _core , _core. endIndex, Index . _emptyBuffer)
177
+ self . _endIndex = Index ( _coreIndex : _core. endIndex, Index . _emptyBuffer)
178
178
if _fastPath ( _core. count != 0 ) {
179
179
let ( _, buffer) = _core. _encodeSomeUTF8 ( from: 0 )
180
- self . _startIndex = Index ( _core , 0 , buffer)
180
+ self . _startIndex = Index ( _coreIndex : 0 , buffer)
181
181
} else {
182
182
self . _startIndex = self . _endIndex
183
183
}
@@ -208,27 +208,42 @@ extension String {
208
208
public struct Index : Comparable {
209
209
internal typealias Buffer = _StringCore . _UTF8Chunk
210
210
211
- init ( _ _core: _StringCore , _ _coreIndex: Int ,
212
- _ _buffer: Buffer ) {
213
- self . _core = _core
211
+ init ( _coreIndex: Int , _ _buffer: Buffer ) {
214
212
self . _coreIndex = _coreIndex
215
213
self . _buffer = _buffer
216
- _sanityCheck ( _coreIndex >= 0 )
217
- _sanityCheck ( _coreIndex <= _core. count)
218
214
}
219
215
220
216
/// True iff the index is at the end of its view or if the next
221
217
/// byte begins a new UnicodeScalar.
222
- internal var _isOnUnicodeScalarBoundary : Bool {
218
+ internal func _isOnUnicodeScalarBoundary( in core : _StringCore ) -> Bool {
223
219
let buffer = UInt32 ( truncatingBitPattern: _buffer)
224
220
let ( codePoint, _) = UTF8 . _decodeOne ( buffer)
225
- return codePoint != nil || _isAtEnd
221
+ return codePoint != nil || _isEndIndex ( of : core )
226
222
}
227
223
228
224
/// True iff the index is at the end of its view
229
- internal var _isAtEnd : Bool {
225
+ internal func _isEndIndex ( of core : _StringCore ) -> Bool {
230
226
return _buffer == Index . _emptyBuffer
231
- && _coreIndex == _core. endIndex
227
+ && _coreIndex == core. endIndex
228
+ }
229
+
230
+ /// The number of UTF-8 code units remaining in the buffer before the
231
+ /// next unicode scalar value is reached. This simulates calling
232
+ /// `index(after: i)` until `i._coreIndex` is incremented, but doesn't
233
+ /// need a `_core` reference.
234
+ internal var _utf8ContinuationBytesUntilNextUnicodeScalar : Int {
235
+ var buffer = _buffer
236
+ var count = 0
237
+
238
+ while true {
239
+ let currentUnit = UTF8 . CodeUnit ( truncatingBitPattern: buffer)
240
+ if currentUnit & 0b1100_0000 != 0b1000_0000 {
241
+ break
242
+ }
243
+ count += 1
244
+ buffer = Index . _nextBuffer ( after: buffer)
245
+ }
246
+ return count
232
247
}
233
248
234
249
/// The value of the buffer when it is empty
@@ -240,20 +255,19 @@ extension String {
240
255
internal static var _bufferHiByte : Buffer {
241
256
return 0xFF << numericCast ( ( sizeof ( Buffer . self) &- 1 ) &* 8 )
242
257
}
243
-
258
+
244
259
/// Consume a byte of the given buffer: shift out the low byte
245
260
/// and put FF in the high byte
246
261
internal static func _nextBuffer( after thisBuffer: Buffer ) -> Buffer {
247
262
return ( thisBuffer >> 8 ) | _bufferHiByte
248
263
}
249
264
250
- /// The underlying buffer we're presenting as UTF-8
251
- internal let _core : _StringCore
252
265
/// The position of `self`, rounded up to the nearest unicode
253
266
/// scalar boundary, in the underlying UTF-16.
254
267
internal let _coreIndex : Int
255
- /// If `self` is at the end of its `_core`, has the value `_endBuffer`.
256
- /// Otherwise, the low byte contains the value of
268
+ /// If `self` is at the end of its `_core`, has the value `_emptyBuffer`.
269
+ /// Otherwise, the low byte contains the value of the UTF-8 code unit
270
+ /// at this position.
257
271
internal let _buffer : Buffer
258
272
}
259
273
@@ -282,31 +296,40 @@ extension String {
282
296
// FIXME: swift-3-indexing-model: range check i?
283
297
let currentUnit = UTF8 . CodeUnit ( truncatingBitPattern: i. _buffer)
284
298
let hiNibble = currentUnit >> 4
285
- // Map the high nibble of the current code unit into the
286
- // amount by which to increment the UTF-16 index. Only when
287
- // the high nibble is 1111 do we have a surrogate pair.
299
+
300
+ // Amounts to increment the UTF-16 index based on the high nibble of a
301
+ // UTF-8 code unit. If the high nibble is:
302
+ //
303
+ // - 0b0000-0b0111: U+0000...U+007F: increment the UTF-16 pointer by 1
304
+ // - 0b1000-0b1011: UTF-8 continuation byte, do not increment
305
+ // the UTF-16 pointer
306
+ // - 0b1100-0b1110: U+0080...U+FFFF: increment the UTF-16 pointer by 1
307
+ // - 0b1111: U+10000...U+1FFFFF: increment the UTF-16 pointer by 2
288
308
let u16Increments = Int ( bitPattern:
289
309
// 1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
290
310
0b10___01___01___01___00___00___00___00___01___01___01___01___01___01___01___01 )
311
+
312
+ // Map the high nibble of the current code unit into the
313
+ // amount by which to increment the UTF-16 index.
291
314
let increment = ( u16Increments >> numericCast ( hiNibble << 1 ) ) & 0x3
292
315
let nextCoreIndex = i. _coreIndex &+ increment
293
316
let nextBuffer = Index . _nextBuffer ( after: i. _buffer)
294
317
295
- // if the nextBuffer is nonempty, we have all we need
318
+ // If the nextBuffer is nonempty, we have all we need
296
319
if _fastPath ( nextBuffer != Index . _emptyBuffer) {
297
- return Index ( i . _core , nextCoreIndex, nextBuffer)
320
+ return Index ( _coreIndex : nextCoreIndex, nextBuffer)
298
321
}
299
322
// If the underlying UTF16 isn't exhausted, fill a new buffer
300
- else if _fastPath ( nextCoreIndex < i . _core. endIndex) {
301
- let ( _, freshBuffer) = i . _core. _encodeSomeUTF8 ( from: nextCoreIndex)
302
- return Index ( _core , nextCoreIndex, freshBuffer)
323
+ else if _fastPath ( nextCoreIndex < _core. endIndex) {
324
+ let ( _, freshBuffer) = _core. _encodeSomeUTF8 ( from: nextCoreIndex)
325
+ return Index ( _coreIndex : nextCoreIndex, freshBuffer)
303
326
}
304
327
else {
305
328
// Produce the endIndex
306
329
_precondition (
307
- nextCoreIndex == i . _core. endIndex,
330
+ nextCoreIndex == _core. endIndex,
308
331
" Can't increment past endIndex of String.UTF8View " )
309
- return Index ( _core , nextCoreIndex, nextBuffer)
332
+ return Index ( _coreIndex : nextCoreIndex, nextBuffer)
310
333
}
311
334
}
312
335
@@ -468,17 +491,19 @@ public func < (
468
491
lhs: String . UTF8View . Index ,
469
492
rhs: String . UTF8View . Index
470
493
) -> Bool {
471
- // FIXME: swift-3-indexing-model: tests.
472
- // FIXME: swift-3-indexing-model: this implementation is wrong, it is just a
473
- // temporary HACK.
494
+ if lhs. _coreIndex == rhs. _coreIndex && lhs. _buffer != rhs. _buffer {
495
+ // The index with more continuation bytes remaining before the next
496
+ return lhs. _utf8ContinuationBytesUntilNextUnicodeScalar >
497
+ rhs. _utf8ContinuationBytesUntilNextUnicodeScalar
498
+ }
474
499
return lhs. _coreIndex < rhs. _coreIndex
475
500
}
476
501
477
502
// Index conversions
478
503
extension String . UTF8View . Index {
479
504
internal init ( _ core: _StringCore , _utf16Offset: Int ) {
480
505
let ( _, buffer) = core. _encodeSomeUTF8 ( from: _utf16Offset)
481
- self . init ( core , _utf16Offset, buffer)
506
+ self . init ( _coreIndex : _utf16Offset, buffer)
482
507
}
483
508
484
509
/// Creates an index in the given UTF-8 view that corresponds exactly to the
0 commit comments