Skip to content

Commit ce94bd9

Browse files
authored
Merge pull request #3268 from natecook1000/nc-scalarindex-nocore
[stdlib] Remove _StringCore from UnicodeScalarIndex
2 parents c8b8838 + 202f84e commit ce94bd9

File tree

5 files changed

+131
-152
lines changed

5 files changed

+131
-152
lines changed

stdlib/public/SDK/Foundation/NSStringAPI.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@ extension String {
9696
/// Return an `Index` corresponding to the given offset in our UTF-16
9797
/// representation.
9898
func _index(_ utf16Index: Int) -> Index {
99-
return Index(_base: String.UnicodeScalarView.Index(utf16Index, _core))
99+
return Index(
100+
_base: String.UnicodeScalarView.Index(_position: utf16Index),
101+
in: characters
102+
)
100103
}
101104

102105
/// Return a `Range<Index>` corresponding to the given `NSRange` of

stdlib/public/core/StringCharacterView.swift

Lines changed: 97 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,9 @@ extension String.CharacterView : BidirectionalCollection {
156156
/// // Prints "[72, 101, 97, 114, 116, 115]"
157157
public struct Index : Comparable, CustomPlaygroundQuickLookable {
158158
public // SPI(Foundation)
159-
init(_base: String.UnicodeScalarView.Index) {
159+
init(_base: String.UnicodeScalarView.Index, in c: String.CharacterView) {
160160
self._base = _base
161-
self._countUTF16 =
162-
Index._measureExtendedGraphemeClusterForward(from: _base)
161+
self._countUTF16 = c._measureExtendedGraphemeClusterForward(from: _base)
163162
}
164163

165164
internal init(_base: UnicodeScalarView.Index, _countUTF16: Int) {
@@ -181,88 +180,7 @@ extension String.CharacterView : BidirectionalCollection {
181180
/// The one past end index for this extended grapheme cluster in Unicode
182181
/// scalars.
183182
internal var _endBase: UnicodeScalarView.Index {
184-
return UnicodeScalarView.Index(
185-
_utf16Index + _countUTF16, _base._core)
186-
}
187-
188-
/// Returns the length of the first extended grapheme cluster in UTF-16
189-
/// code units.
190-
@inline(never)
191-
internal static func _measureExtendedGraphemeClusterForward(
192-
from start: UnicodeScalarView.Index
193-
) -> Int {
194-
var start = start
195-
let end = start._viewEndIndex
196-
if start == end {
197-
return 0
198-
}
199-
200-
let startIndexUTF16 = start._position
201-
let unicodeScalars = UnicodeScalarView(start._core)
202-
let graphemeClusterBreakProperty =
203-
_UnicodeGraphemeClusterBreakPropertyTrie()
204-
let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()
205-
206-
var gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
207-
unicodeScalars[start].value)
208-
unicodeScalars.formIndex(after: &start)
209-
210-
while start != end {
211-
// FIXME(performance): consider removing this "fast path". A branch
212-
// that is hard to predict could be worse for performance than a few
213-
// loads from cache to fetch the property 'gcb1'.
214-
if segmenter.isBoundaryAfter(gcb0) {
215-
break
216-
}
217-
let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
218-
unicodeScalars[start].value)
219-
if segmenter.isBoundary(gcb0, gcb1) {
220-
break
221-
}
222-
gcb0 = gcb1
223-
unicodeScalars.formIndex(after: &start)
224-
}
225-
226-
return start._position - startIndexUTF16
227-
}
228-
229-
/// Returns the length of the previous extended grapheme cluster in UTF-16
230-
/// code units.
231-
@inline(never)
232-
internal static func _measureExtendedGraphemeClusterBackward(
233-
from end: UnicodeScalarView.Index
234-
) -> Int {
235-
let start = end._viewStartIndex
236-
if start == end {
237-
return 0
238-
}
239-
240-
let endIndexUTF16 = end._position
241-
let unicodeScalars = UnicodeScalarView(start._core)
242-
let graphemeClusterBreakProperty =
243-
_UnicodeGraphemeClusterBreakPropertyTrie()
244-
let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()
245-
246-
var graphemeClusterStart = end
247-
248-
unicodeScalars.formIndex(before: &graphemeClusterStart)
249-
var gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
250-
unicodeScalars[graphemeClusterStart].value)
251-
252-
var graphemeClusterStartUTF16 = graphemeClusterStart._position
253-
254-
while graphemeClusterStart != start {
255-
unicodeScalars.formIndex(before: &graphemeClusterStart)
256-
let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
257-
unicodeScalars[graphemeClusterStart].value)
258-
if segmenter.isBoundary(gcb1, gcb0) {
259-
break
260-
}
261-
gcb0 = gcb1
262-
graphemeClusterStartUTF16 = graphemeClusterStart._position
263-
}
264-
265-
return endIndexUTF16 - graphemeClusterStartUTF16
183+
return UnicodeScalarView.Index(_position: _utf16Index + _countUTF16)
266184
}
267185

268186
public var customPlaygroundQuickLook: PlaygroundQuickLook {
@@ -276,39 +194,124 @@ extension String.CharacterView : BidirectionalCollection {
276194
///
277195
/// In an empty character view, `startIndex` is equal to `endIndex`.
278196
public var startIndex: Index {
279-
return Index(_base: unicodeScalars.startIndex)
197+
return Index(_base: unicodeScalars.startIndex, in: self)
280198
}
281199

282200
/// A character view's "past the end" position---that is, the position one
283201
/// greater than the last valid subscript argument.
284202
///
285203
/// In an empty character view, `endIndex` is equal to `startIndex`.
286204
public var endIndex: Index {
287-
return Index(_base: unicodeScalars.endIndex)
205+
return Index(_base: unicodeScalars.endIndex, in: self)
288206
}
289207

290208
/// Returns the next consecutive position after `i`.
291209
///
292210
/// - Precondition: The next position is valid.
293211
public func index(after i: Index) -> Index {
294-
_precondition(i._base != i._base._viewEndIndex, "cannot increment endIndex")
295-
return Index(_base: i._endBase)
212+
_precondition(i._base < unicodeScalars.endIndex,
213+
"cannot increment beyond endIndex")
214+
_precondition(i._base >= unicodeScalars.startIndex,
215+
"cannot increment invalid index")
216+
return Index(_base: i._endBase, in: self)
296217
}
297218

298219
/// Returns the previous consecutive position before `i`.
299220
///
300221
/// - Precondition: The previous position is valid.
301222
public func index(before i: Index) -> Index {
302-
// FIXME: swift-3-indexing-model: range check i?
303-
_precondition(i._base != i._base._viewStartIndex,
304-
"cannot decrement startIndex")
223+
_precondition(i._base > unicodeScalars.startIndex,
224+
"cannot decrement before startIndex")
225+
_precondition(i._base <= unicodeScalars.endIndex,
226+
"cannot decrement invalid index")
305227
let predecessorLengthUTF16 =
306-
Index._measureExtendedGraphemeClusterBackward(from: i._base)
228+
_measureExtendedGraphemeClusterBackward(from: i._base)
307229
return Index(
308230
_base: UnicodeScalarView.Index(
309-
i._utf16Index - predecessorLengthUTF16, i._base._core))
231+
_position: i._utf16Index - predecessorLengthUTF16
232+
),
233+
in: self
234+
)
310235
}
311236

237+
/// Returns the length of the first extended grapheme cluster in UTF-16
238+
/// code units.
239+
@inline(never)
240+
internal func _measureExtendedGraphemeClusterForward(
241+
from start: UnicodeScalarView.Index
242+
) -> Int {
243+
var start = start
244+
let end = UnicodeScalarView.Index(_position: _core.count)
245+
if start == end {
246+
return 0
247+
}
248+
249+
let startIndexUTF16 = start._position
250+
let graphemeClusterBreakProperty =
251+
_UnicodeGraphemeClusterBreakPropertyTrie()
252+
let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()
253+
254+
var gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
255+
unicodeScalars[start].value)
256+
unicodeScalars.formIndex(after: &start)
257+
258+
while start != end {
259+
// FIXME(performance): consider removing this "fast path". A branch
260+
// that is hard to predict could be worse for performance than a few
261+
// loads from cache to fetch the property 'gcb1'.
262+
if segmenter.isBoundaryAfter(gcb0) {
263+
break
264+
}
265+
let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
266+
unicodeScalars[start].value)
267+
if segmenter.isBoundary(gcb0, gcb1) {
268+
break
269+
}
270+
gcb0 = gcb1
271+
unicodeScalars.formIndex(after: &start)
272+
}
273+
274+
return start._position - startIndexUTF16
275+
}
276+
277+
/// Returns the length of the previous extended grapheme cluster in UTF-16
278+
/// code units.
279+
@inline(never)
280+
internal func _measureExtendedGraphemeClusterBackward(
281+
from end: UnicodeScalarView.Index
282+
) -> Int {
283+
let start = UnicodeScalarView.Index(_position: 0)
284+
if start == end {
285+
return 0
286+
}
287+
288+
let endIndexUTF16 = end._position
289+
let graphemeClusterBreakProperty =
290+
_UnicodeGraphemeClusterBreakPropertyTrie()
291+
let segmenter = _UnicodeExtendedGraphemeClusterSegmenter()
292+
293+
var graphemeClusterStart = end
294+
295+
unicodeScalars.formIndex(before: &graphemeClusterStart)
296+
var gcb0 = graphemeClusterBreakProperty.getPropertyRawValue(
297+
unicodeScalars[graphemeClusterStart].value)
298+
299+
var graphemeClusterStartUTF16 = graphemeClusterStart._position
300+
301+
while graphemeClusterStart != start {
302+
unicodeScalars.formIndex(before: &graphemeClusterStart)
303+
let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
304+
unicodeScalars[graphemeClusterStart].value)
305+
if segmenter.isBoundary(gcb1, gcb0) {
306+
break
307+
}
308+
gcb0 = gcb1
309+
graphemeClusterStartUTF16 = graphemeClusterStart._position
310+
}
311+
312+
return endIndexUTF16 - graphemeClusterStartUTF16
313+
}
314+
312315
/// Accesses the character at the given position.
313316
///
314317
/// The following example searches a string's character view for a capital

stdlib/public/core/StringIndexConversions.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@ extension String.Index {
4848
_ unicodeScalarIndex: String.UnicodeScalarIndex,
4949
within other: String
5050
) {
51-
if !unicodeScalarIndex._isOnGraphemeClusterBoundary {
51+
if !other.unicodeScalars._isOnGraphemeClusterBoundary(unicodeScalarIndex) {
5252
return nil
5353
}
54-
self.init(_base: unicodeScalarIndex)
54+
self.init(_base: unicodeScalarIndex, in: other.characters)
5555
}
5656

5757
/// Creates an index in the given string that corresponds exactly to the

stdlib/public/core/StringUnicodeScalarView.swift

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -115,39 +115,28 @@ extension String {
115115
/// print(hearts.unicodeScalars[j].value)
116116
/// // Prints "9829"
117117
public struct Index : Comparable {
118-
public init(_ _position: Int, _ _core: _StringCore) {
118+
public // SPI(Foundation)
119+
init(_position: Int) {
119120
self._position = _position
120-
self._core = _core
121-
}
122-
123-
/// The end index that for this view.
124-
internal var _viewStartIndex: Index {
125-
return Index(_core.startIndex, _core)
126-
}
127-
128-
/// The end index that for this view.
129-
internal var _viewEndIndex: Index {
130-
return Index(_core.endIndex, _core)
131121
}
132122

133123
@_versioned internal var _position: Int
134-
@_versioned internal var _core: _StringCore
135124
}
136125

137126
/// The position of the first Unicode scalar value if the string is
138127
/// nonempty.
139128
///
140129
/// If the string is empty, `startIndex` is equal to `endIndex`.
141130
public var startIndex: Index {
142-
return Index(_core.startIndex, _core)
131+
return Index(_position: _core.startIndex)
143132
}
144133

145134
/// The "past the end" position---that is, the position one greater than
146135
/// the last valid subscript argument.
147136
///
148137
/// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`.
149138
public var endIndex: Index {
150-
return Index(_core.endIndex, _core)
139+
return Index(_position: _core.endIndex)
151140
}
152141

153142
/// Returns the next consecutive location after `i`.
@@ -157,21 +146,21 @@ extension String {
157146
var scratch = _ScratchIterator(_core, i._position)
158147
var decoder = UTF16()
159148
let (_, length) = decoder._decodeOne(&scratch)
160-
return Index(i._position + length, _core)
149+
return Index(_position: i._position + length)
161150
}
162151

163152
/// Returns the previous consecutive location before `i`.
164153
///
165154
/// - Precondition: The previous location exists.
166155
public func index(before i: Index) -> Index {
167-
var i = i._position-1
156+
var i = i._position - 1
168157
let codeUnit = _core[i]
169158
if _slowPath((codeUnit >> 10) == 0b1101_11) {
170159
if i != 0 && (_core[i - 1] >> 10) == 0b1101_10 {
171160
i -= 1
172161
}
173162
}
174-
return Index(i, _core)
163+
return Index(_position: i)
175164
}
176165

177166
/// Accesses the Unicode scalar value at the given position.
@@ -455,7 +444,7 @@ extension String.UnicodeScalarIndex {
455444
return nil
456445
}
457446
}
458-
self.init(utf16Index._offset, unicodeScalars._core)
447+
self.init(_position: utf16Index._offset)
459448
}
460449

461450
/// Creates an index in the given Unicode scalars view that corresponds
@@ -485,7 +474,7 @@ extension String.UnicodeScalarIndex {
485474
if !utf8Index._isOnUnicodeScalarBoundary {
486475
return nil
487476
}
488-
self.init(utf8Index._coreIndex, core)
477+
self.init(_position: utf8Index._coreIndex)
489478
}
490479

491480
/// Creates an index in the given Unicode scalars view that corresponds
@@ -510,7 +499,7 @@ extension String.UnicodeScalarIndex {
510499
_ characterIndex: String.Index,
511500
within unicodeScalars: String.UnicodeScalarView
512501
) {
513-
self.init(characterIndex._base._position, unicodeScalars._core)
502+
self.init(_position: characterIndex._base._position)
514503
}
515504

516505
/// Returns the position in the given UTF-8 view that corresponds exactly to
@@ -581,13 +570,14 @@ extension String.UnicodeScalarIndex {
581570
public func samePosition(in characters: String) -> String.Index? {
582571
return String.Index(self, within: characters)
583572
}
573+
}
584574

585-
internal var _isOnGraphemeClusterBoundary: Bool {
586-
let scalars = String.UnicodeScalarView(_core)
587-
if self == scalars.startIndex || self == scalars.endIndex {
575+
extension String.UnicodeScalarView {
576+
internal func _isOnGraphemeClusterBoundary(_ i: Index) -> Bool {
577+
if i == startIndex || i == endIndex {
588578
return true
589579
}
590-
let precedingScalar = scalars[scalars.index(before: self)]
580+
let precedingScalar = self[index(before: i)]
591581

592582
let graphemeClusterBreakProperty =
593583
_UnicodeGraphemeClusterBreakPropertyTrie()
@@ -600,8 +590,7 @@ extension String.UnicodeScalarIndex {
600590
return true
601591
}
602592

603-
let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(
604-
scalars[self].value)
593+
let gcb1 = graphemeClusterBreakProperty.getPropertyRawValue(self[i].value)
605594

606595
return segmenter.isBoundary(gcb0, gcb1)
607596
}

0 commit comments

Comments
 (0)