Skip to content

Commit 2bf1913

Browse files
authored
Merge pull request #3452 from natecook1000/nc-utf8index-nocore
[stdlib] Remove _StringCore from UTF8View.Index
2 parents 9daf8e3 + b0e20e1 commit 2bf1913

File tree

4 files changed

+65
-32
lines changed

4 files changed

+65
-32
lines changed

stdlib/public/core/StringUTF16.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ extension String.UTF16View.Index {
407407
"Invalid String.UTF8Index for this UTF-16 view")
408408

409409
// Detect positions that have no corresponding index.
410-
if !utf8Index._isOnUnicodeScalarBoundary {
410+
if !utf8Index._isOnUnicodeScalarBoundary(in: core) {
411411
return nil
412412
}
413413
_offset = utf8Index._coreIndex

stdlib/public/core/StringUTF8.swift

Lines changed: 55 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,10 @@ extension String {
174174

175175
init(_ _core: _StringCore) {
176176
self._core = _core
177-
self._endIndex = Index(_core, _core.endIndex, Index._emptyBuffer)
177+
self._endIndex = Index(_coreIndex: _core.endIndex, Index._emptyBuffer)
178178
if _fastPath(_core.count != 0) {
179179
let (_, buffer) = _core._encodeSomeUTF8(from: 0)
180-
self._startIndex = Index(_core, 0, buffer)
180+
self._startIndex = Index(_coreIndex: 0, buffer)
181181
} else {
182182
self._startIndex = self._endIndex
183183
}
@@ -208,27 +208,42 @@ extension String {
208208
public struct Index : Comparable {
209209
internal typealias Buffer = _StringCore._UTF8Chunk
210210

211-
init(_ _core: _StringCore, _ _coreIndex: Int,
212-
_ _buffer: Buffer) {
213-
self._core = _core
211+
init(_coreIndex: Int, _ _buffer: Buffer) {
214212
self._coreIndex = _coreIndex
215213
self._buffer = _buffer
216-
_sanityCheck(_coreIndex >= 0)
217-
_sanityCheck(_coreIndex <= _core.count)
218214
}
219215

220216
/// True iff the index is at the end of its view or if the next
221217
/// byte begins a new UnicodeScalar.
222-
internal var _isOnUnicodeScalarBoundary: Bool {
218+
internal func _isOnUnicodeScalarBoundary(in core: _StringCore) -> Bool {
223219
let buffer = UInt32(truncatingBitPattern: _buffer)
224220
let (codePoint, _) = UTF8._decodeOne(buffer)
225-
return codePoint != nil || _isAtEnd
221+
return codePoint != nil || _isEndIndex(of: core)
226222
}
227223

228224
/// True iff the index is at the end of its view
229-
internal var _isAtEnd: Bool {
225+
internal func _isEndIndex(of core: _StringCore) -> Bool {
230226
return _buffer == Index._emptyBuffer
231-
&& _coreIndex == _core.endIndex
227+
&& _coreIndex == core.endIndex
228+
}
229+
230+
/// The number of UTF-8 code units remaining in the buffer before the
231+
/// next unicode scalar value is reached. This simulates calling
232+
/// `index(after: i)` until `i._coreIndex` is incremented, but doesn't
233+
/// need a `_core` reference.
234+
internal var _utf8ContinuationBytesUntilNextUnicodeScalar: Int {
235+
var buffer = _buffer
236+
var count = 0
237+
238+
while true {
239+
let currentUnit = UTF8.CodeUnit(truncatingBitPattern: buffer)
240+
if currentUnit & 0b1100_0000 != 0b1000_0000 {
241+
break
242+
}
243+
count += 1
244+
buffer = Index._nextBuffer(after: buffer)
245+
}
246+
return count
232247
}
233248

234249
/// The value of the buffer when it is empty
@@ -240,20 +255,19 @@ extension String {
240255
internal static var _bufferHiByte: Buffer {
241256
return 0xFF << numericCast((sizeof(Buffer.self) &- 1) &* 8)
242257
}
243-
258+
244259
/// Consume a byte of the given buffer: shift out the low byte
245260
/// and put FF in the high byte
246261
internal static func _nextBuffer(after thisBuffer: Buffer) -> Buffer {
247262
return (thisBuffer >> 8) | _bufferHiByte
248263
}
249264

250-
/// The underlying buffer we're presenting as UTF-8
251-
internal let _core: _StringCore
252265
/// The position of `self`, rounded up to the nearest unicode
253266
/// scalar boundary, in the underlying UTF-16.
254267
internal let _coreIndex: Int
255-
/// If `self` is at the end of its `_core`, has the value `_endBuffer`.
256-
/// Otherwise, the low byte contains the value of
268+
/// If `self` is at the end of its `_core`, has the value `_emptyBuffer`.
269+
/// Otherwise, the low byte contains the value of the UTF-8 code unit
270+
/// at this position.
257271
internal let _buffer: Buffer
258272
}
259273

@@ -282,31 +296,40 @@ extension String {
282296
// FIXME: swift-3-indexing-model: range check i?
283297
let currentUnit = UTF8.CodeUnit(truncatingBitPattern: i._buffer)
284298
let hiNibble = currentUnit >> 4
285-
// Map the high nibble of the current code unit into the
286-
// amount by which to increment the UTF-16 index. Only when
287-
// the high nibble is 1111 do we have a surrogate pair.
299+
300+
// Amounts to increment the UTF-16 index based on the high nibble of a
301+
// UTF-8 code unit. If the high nibble is:
302+
//
303+
// - 0b0000-0b0111: U+0000...U+007F: increment the UTF-16 pointer by 1
304+
// - 0b1000-0b1011: UTF-8 continuation byte, do not increment
305+
// the UTF-16 pointer
306+
// - 0b1100-0b1110: U+0080...U+FFFF: increment the UTF-16 pointer by 1
307+
// - 0b1111: U+10000...U+1FFFFF: increment the UTF-16 pointer by 2
288308
let u16Increments = Int(bitPattern:
289309
// 1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
290310
0b10___01___01___01___00___00___00___00___01___01___01___01___01___01___01___01)
311+
312+
// Map the high nibble of the current code unit into the
313+
// amount by which to increment the UTF-16 index.
291314
let increment = (u16Increments >> numericCast(hiNibble << 1)) & 0x3
292315
let nextCoreIndex = i._coreIndex &+ increment
293316
let nextBuffer = Index._nextBuffer(after: i._buffer)
294317

295-
// if the nextBuffer is nonempty, we have all we need
318+
// If the nextBuffer is nonempty, we have all we need
296319
if _fastPath(nextBuffer != Index._emptyBuffer) {
297-
return Index(i._core, nextCoreIndex, nextBuffer)
320+
return Index(_coreIndex: nextCoreIndex, nextBuffer)
298321
}
299322
// If the underlying UTF16 isn't exhausted, fill a new buffer
300-
else if _fastPath(nextCoreIndex < i._core.endIndex) {
301-
let (_, freshBuffer) = i._core._encodeSomeUTF8(from: nextCoreIndex)
302-
return Index(_core, nextCoreIndex, freshBuffer)
323+
else if _fastPath(nextCoreIndex < _core.endIndex) {
324+
let (_, freshBuffer) = _core._encodeSomeUTF8(from: nextCoreIndex)
325+
return Index(_coreIndex: nextCoreIndex, freshBuffer)
303326
}
304327
else {
305328
// Produce the endIndex
306329
_precondition(
307-
nextCoreIndex == i._core.endIndex,
330+
nextCoreIndex == _core.endIndex,
308331
"Can't increment past endIndex of String.UTF8View")
309-
return Index(_core, nextCoreIndex, nextBuffer)
332+
return Index(_coreIndex: nextCoreIndex, nextBuffer)
310333
}
311334
}
312335

@@ -468,17 +491,19 @@ public func < (
468491
lhs: String.UTF8View.Index,
469492
rhs: String.UTF8View.Index
470493
) -> Bool {
471-
// FIXME: swift-3-indexing-model: tests.
472-
// FIXME: swift-3-indexing-model: this implementation is wrong, it is just a
473-
// temporary HACK.
494+
if lhs._coreIndex == rhs._coreIndex && lhs._buffer != rhs._buffer {
495+
// The index with more continuation bytes remaining before the next
496+
return lhs._utf8ContinuationBytesUntilNextUnicodeScalar >
497+
rhs._utf8ContinuationBytesUntilNextUnicodeScalar
498+
}
474499
return lhs._coreIndex < rhs._coreIndex
475500
}
476501

477502
// Index conversions
478503
extension String.UTF8View.Index {
479504
internal init(_ core: _StringCore, _utf16Offset: Int) {
480505
let (_, buffer) = core._encodeSomeUTF8(from: _utf16Offset)
481-
self.init(core, _utf16Offset, buffer)
506+
self.init(_coreIndex: _utf16Offset, buffer)
482507
}
483508

484509
/// Creates an index in the given UTF-8 view that corresponds exactly to the

stdlib/public/core/StringUnicodeScalarView.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ extension String.UnicodeScalarIndex {
471471
"Invalid String.UTF8Index for this UnicodeScalar view")
472472

473473
// Detect positions that have no corresponding index.
474-
if !utf8Index._isOnUnicodeScalarBoundary {
474+
if !utf8Index._isOnUnicodeScalarBoundary(in: core) {
475475
return nil
476476
}
477477
self.init(_position: utf8Index._coreIndex)

validation-test/stdlib/StringViews.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,14 @@ tests.test("UTF8 indexes") {
687687
}
688688
}
689689

690+
tests.test("index/Comparable")
691+
.forEach(in: [summer, winter]) { str in
692+
checkComparable(str.characters.indices, oracle: <=>)
693+
checkComparable(str.unicodeScalars.indices, oracle: <=>)
694+
checkComparable(str.utf16.indices, oracle: <=>)
695+
checkComparable(str.utf8.indices, oracle: <=>)
696+
}
697+
690698
tests.test("UTF16->String") {
691699
let s = summer + winter + winter + summer
692700
let v = s.utf16

0 commit comments

Comments
 (0)