Skip to content

Commit d588112

Browse files
committed
[stdlib] String.UnicodeScalarView: Review index validation
1 parent 67f01a1 commit d588112

File tree

3 files changed

+130
-62
lines changed

3 files changed

+130
-62
lines changed

stdlib/public/core/StringGuts.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,12 +367,14 @@ extension _StringGuts {
367367
/// not set the flags that this method relies on. However, false positives
368368
/// cannot happen: if this method detects a mismatch, then it is guaranteed to
369369
/// be a real one.
370+
@_alwaysEmitIntoClient
370371
@inline(__always)
371372
internal func ensureMatchingEncoding(_ i: String.Index) -> String.Index {
372373
if _fastPath(!isForeign && i._canBeUTF8) { return i }
373374
return _slowEnsureMatchingEncoding(i)
374375
}
375376

377+
@_alwaysEmitIntoClient
376378
@inline(never)
377379
internal func _slowEnsureMatchingEncoding(_ i: String.Index) -> String.Index {
378380
_internalInvariant(isForeign || !i._canBeUTF8)
@@ -428,6 +430,7 @@ extension _StringGuts {
428430
/// - has an encoding that matches this string,
429431
/// - is within the bounds of this string, and
430432
/// - is aligned on a scalar boundary.
433+
@_alwaysEmitIntoClient
431434
internal func validateScalarIndex(_ i: String.Index) -> String.Index {
432435
let i = ensureMatchingEncoding(i)
433436
_precondition(i._encodedOffset < count, "String index is out of bounds")
@@ -441,6 +444,7 @@ extension _StringGuts {
441444
/// - has an encoding that matches this string,
442445
/// - is within `start ..< end`, and
443446
/// - is aligned on a scalar boundary.
447+
@_alwaysEmitIntoClient
444448
internal func validateScalarIndex(
445449
_ i: String.Index,
446450
from start: String.Index,
@@ -460,6 +464,7 @@ extension _StringGuts {
460464
/// - has an encoding that matches this string,
461465
/// - is within the bounds of this string (including the `endIndex`), and
462466
/// - is aligned on a scalar boundary.
467+
@_alwaysEmitIntoClient
463468
internal func validateInclusiveScalarIndex(
464469
_ i: String.Index
465470
) -> String.Index {

stdlib/public/core/StringUnicodeScalarView.swift

Lines changed: 92 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -106,42 +106,38 @@ extension String.UnicodeScalarView: BidirectionalCollection {
106106
/// - Precondition: The next location exists.
107107
@inlinable @inline(__always)
108108
public func index(after i: Index) -> Index {
109-
// TODO(String performance): isASCII fast-path
110-
111-
// TODO(lorentey): Review index validation
112-
_precondition(i < endIndex, "String index is out of bounds")
113-
let i = _guts.scalarAlign(i)
109+
let i = _guts.validateScalarIndex(i)
110+
return _uncheckedIndex(after: i)
111+
}
114112

113+
@_alwaysEmitIntoClient
114+
@inline(__always)
115+
internal func _uncheckedIndex(after i: Index) -> Index {
116+
// TODO(String performance): isASCII fast-path
115117
if _fastPath(_guts.isFastUTF8) {
116118
let len = _guts.fastUTF8ScalarLength(startingAt: i._encodedOffset)
117119
return i.encoded(offsetBy: len)._scalarAligned._knownUTF8
118120
}
119-
120121
return _foreignIndex(after: i)
121122
}
122123

123-
@_alwaysEmitIntoClient // Swift 5.1 bug fix
124-
public func distance(from start: Index, to end: Index) -> Int {
125-
// TODO(lorentey): Review index validation
126-
return _distance(from: _guts.scalarAlign(start), to: _guts.scalarAlign(end))
127-
}
128-
129124
/// Returns the previous consecutive location before `i`.
130125
///
131126
/// - Precondition: The previous location exists.
132127
@inlinable @inline(__always)
133128
public func index(before i: Index) -> Index {
134-
// TODO(lorentey): Review index validation
135-
// TODO(String performance): isASCII fast-path
136-
137-
// Note: bounds checking in `index(before:)` is tricky as scalar aligning an
138-
// index may need to access storage, but it may also move it closer towards
139-
// the `startIndex`. Therefore, we must check against the `endIndex` before
140-
// aligning, but we need to delay the `i > startIndex` check until after.
141-
_precondition(i <= endIndex, "String index is out of bounds")
142-
let i = _guts.scalarAlign(i)
129+
let i = _guts.validateInclusiveScalarIndex(i)
130+
// Note: Aligning an index may move it closer towards the `startIndex`, so
131+
// the `i > startIndex` check needs to come after rounding.
143132
_precondition(i > startIndex, "String index is out of bounds")
144133

134+
return _uncheckedIndex(before: i)
135+
}
136+
137+
@_alwaysEmitIntoClient
138+
@inline(__always)
139+
internal func _uncheckedIndex(before i: Index) -> Index {
140+
// TODO(String performance): isASCII fast-path
145141
if _fastPath(_guts.isFastUTF8) {
146142
let len = _guts.withFastUTF8 { utf8 in
147143
_utf8ScalarLength(utf8, endingAt: i._encodedOffset)
@@ -171,11 +167,80 @@ extension String.UnicodeScalarView: BidirectionalCollection {
171167
/// must be less than the view's end index.
172168
@inlinable @inline(__always)
173169
public subscript(position: Index) -> Unicode.Scalar {
174-
// TODO(lorentey): Review index validation
175-
String(_guts)._boundsCheck(position)
176-
let i = _guts.scalarAlign(position)
170+
let i = _guts.validateScalarIndex(position)
177171
return _guts.errorCorrectedScalar(startingAt: i._encodedOffset).0
178172
}
173+
174+
@_alwaysEmitIntoClient // Swift 5.1 bug fix
175+
public func distance(from start: Index, to end: Index) -> Int {
176+
let start = _guts.validateInclusiveScalarIndex(start)
177+
let end = _guts.validateInclusiveScalarIndex(end)
178+
179+
var i = start
180+
var count = 0
181+
if i < end {
182+
while i < end {
183+
count += 1
184+
i = _uncheckedIndex(after: i)
185+
}
186+
}
187+
else if i > end {
188+
while i > end {
189+
count -= 1
190+
i = _uncheckedIndex(before: i)
191+
}
192+
}
193+
return count
194+
}
195+
196+
@_alwaysEmitIntoClient
197+
public func index(_ i: Index, offsetBy distance: Int) -> Index {
198+
var i = _guts.validateInclusiveScalarIndex(i)
199+
200+
if distance >= 0 {
201+
for _ in stride(from: 0, to: distance, by: 1) {
202+
_precondition(i._encodedOffset < _guts.count, "String index is out of bounds")
203+
i = _uncheckedIndex(after: i)
204+
}
205+
} else {
206+
for _ in stride(from: 0, to: distance, by: -1) {
207+
_precondition(i._encodedOffset > 0, "String index is out of bounds")
208+
i = _uncheckedIndex(before: i)
209+
}
210+
}
211+
return _guts.markEncoding(i)
212+
}
213+
214+
@_alwaysEmitIntoClient
215+
public func index(
216+
_ i: Index, offsetBy distance: Int, limitedBy limit: Index
217+
) -> Index? {
218+
// Note: `limit` is intentionally not scalar aligned to ensure our behavior
219+
// exactly matches the documentation above. We do need to ensure it has a
220+
// matching encoding, though. The same goes for `start`, which is used to
221+
// determine whether the limit applies at all.
222+
let limit = _guts.ensureMatchingEncoding(limit)
223+
let start = _guts.ensureMatchingEncoding(i)
224+
225+
var i = _guts.validateInclusiveScalarIndex(i)
226+
227+
if distance >= 0 {
228+
for _ in stride(from: 0, to: distance, by: 1) {
229+
guard limit < start || i < limit else { return nil }
230+
_precondition(i._encodedOffset < _guts.count, "String index is out of bounds")
231+
i = _uncheckedIndex(after: i)
232+
}
233+
guard limit < start || i <= limit else { return nil }
234+
} else {
235+
for _ in stride(from: 0, to: distance, by: -1) {
236+
guard limit > start || i > limit else { return nil }
237+
_precondition(i._encodedOffset > 0, "String index is out of bounds")
238+
i = _uncheckedIndex(before: i)
239+
}
240+
guard limit > start || i >= limit else { return nil }
241+
}
242+
return _guts.markEncoding(i)
243+
}
179244
}
180245

181246
extension String.UnicodeScalarView {
@@ -318,9 +383,8 @@ extension String.UnicodeScalarView: RangeReplaceableCollection {
318383
_ bounds: Range<Index>,
319384
with newElements: C
320385
) where C: Collection, C.Element == Unicode.Scalar {
321-
// TODO(lorentey): Review index validation
322386
// TODO(String performance): Skip extra String and Array allocation
323-
387+
let bounds = _guts.validateScalarRange(bounds)
324388
let utf8Replacement = newElements.flatMap { String($0).utf8 }
325389
let replacement = utf8Replacement.withUnsafeBufferPointer {
326390
return String._uncheckedFromUTF8($0)
@@ -423,9 +487,8 @@ extension String.UnicodeScalarView {
423487

424488
@available(swift, introduced: 4)
425489
public subscript(r: Range<Index>) -> String.UnicodeScalarView.SubSequence {
426-
// TODO(lorentey): Review index validation
427-
_failEarlyRangeCheck(r, bounds: startIndex..<endIndex)
428-
return String.UnicodeScalarView.SubSequence(self, _bounds: r)
490+
let r = _guts.validateScalarRange(r)
491+
return SubSequence(_unchecked: self, bounds: r)
429492
}
430493
}
431494

stdlib/public/core/Substring.swift

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1018,14 +1018,24 @@ extension Substring {
10181018
internal var _slice: Slice<String.UnicodeScalarView>
10191019

10201020
/// Creates an instance that slices `base` at `_bounds`.
1021-
@inlinable
1021+
internal init(
1022+
_unchecked base: String.UnicodeScalarView, bounds: Range<Index>
1023+
) {
1024+
_slice = Slice(base: base, bounds: bounds)
1025+
}
1026+
1027+
/// Creates an instance that slices `base` at `_bounds`.
1028+
@usableFromInline // This used to be inlinable before 5.7
1029+
@available(*, deprecated) // Use `init(_unchecked:)` in new code.
10221030
internal init(_ base: String.UnicodeScalarView, _bounds: Range<Index>) {
10231031
let start = base._guts.scalarAlign(_bounds.lowerBound)
10241032
let end = base._guts.scalarAlign(_bounds.upperBound)
1025-
_slice = Slice(
1026-
base: String(base._guts).unicodeScalars,
1027-
bounds: Range(_uncheckedBounds: (start, end)))
1033+
_slice = Slice(base: base, bounds: Range(_uncheckedBounds: (start, end)))
10281034
}
1035+
1036+
@_alwaysEmitIntoClient
1037+
@inline(__always)
1038+
internal var _wholeGuts: _StringGuts { _slice._base._guts }
10291039
}
10301040
}
10311041

@@ -1038,87 +1048,77 @@ extension Substring.UnicodeScalarView: BidirectionalCollection {
10381048
//
10391049
// Plumb slice operations through
10401050
//
1041-
@inlinable
1042-
public var startIndex: Index { return _slice.startIndex }
1051+
@inlinable @inline(__always)
1052+
public var startIndex: Index { _slice._startIndex }
10431053

1044-
@inlinable
1045-
public var endIndex: Index { return _slice.endIndex }
1054+
@inlinable @inline(__always)
1055+
public var endIndex: Index { _slice._endIndex }
10461056

10471057
@inlinable
10481058
public subscript(index: Index) -> Element {
1049-
// TODO(lorentey): Review index validation
1050-
return _slice[index]
1059+
let index = _wholeGuts.validateScalarIndex(
1060+
index, from: startIndex, to: endIndex)
1061+
return _wholeGuts.errorCorrectedScalar(startingAt: index._encodedOffset).0
10511062
}
10521063

10531064
@inlinable
10541065
public var indices: Indices {
1055-
// TODO(lorentey): Review index validation
10561066
return _slice.indices
10571067
}
10581068

10591069
@inlinable
10601070
public func index(after i: Index) -> Index {
1061-
// TODO(lorentey): Review index validation
1062-
return _slice.index(after: i)
1071+
_slice._base.index(after: i)
10631072
}
10641073

10651074
@inlinable
10661075
public func formIndex(after i: inout Index) {
1067-
// TODO(lorentey): Review index validation
1068-
_slice.formIndex(after: &i)
1076+
_slice._base.formIndex(after: &i)
10691077
}
10701078

10711079
@inlinable
10721080
public func index(_ i: Index, offsetBy n: Int) -> Index {
1073-
// TODO(lorentey): Review index validation
1074-
return _slice.index(i, offsetBy: n)
1081+
_slice._base.index(i, offsetBy: n)
10751082
}
10761083

10771084
@inlinable
10781085
public func index(
10791086
_ i: Index, offsetBy n: Int, limitedBy limit: Index
10801087
) -> Index? {
1081-
// TODO(lorentey): Review index validation
1082-
return _slice.index(i, offsetBy: n, limitedBy: limit)
1088+
_slice._base.index(i, offsetBy: n, limitedBy: limit)
10831089
}
10841090

10851091
@inlinable
10861092
public func distance(from start: Index, to end: Index) -> Int {
1087-
// TODO(lorentey): Review index validation
1088-
return _slice.distance(from: start, to: end)
1093+
_slice._base.distance(from: start, to: end)
10891094
}
10901095

10911096
@inlinable
10921097
public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) {
1093-
// TODO(lorentey): Review index validation
1094-
_slice._failEarlyRangeCheck(index, bounds: bounds)
1098+
_slice._base._failEarlyRangeCheck(index, bounds: bounds)
10951099
}
10961100

10971101
@inlinable
10981102
public func _failEarlyRangeCheck(
10991103
_ range: Range<Index>, bounds: Range<Index>
11001104
) {
1101-
// TODO(lorentey): Review index validation
1102-
_slice._failEarlyRangeCheck(range, bounds: bounds)
1105+
_slice._base._failEarlyRangeCheck(range, bounds: bounds)
11031106
}
11041107

11051108
@inlinable
11061109
public func index(before i: Index) -> Index {
1107-
// TODO(lorentey): Review index validation
1108-
return _slice.index(before: i)
1110+
_slice._base.index(before: i)
11091111
}
11101112

11111113
@inlinable
11121114
public func formIndex(before i: inout Index) {
1113-
// TODO(lorentey): Review index validation
1114-
_slice.formIndex(before: &i)
1115+
_slice._base.formIndex(before: &i)
11151116
}
11161117

1117-
@inlinable
11181118
public subscript(r: Range<Index>) -> Substring.UnicodeScalarView {
1119-
// TODO(lorentey): Review index validation
1120-
_failEarlyRangeCheck(r, bounds: startIndex..<endIndex)
1121-
return Substring.UnicodeScalarView(_slice._base, _bounds: r)
1119+
// Note: This used to be inlinable until Swift 5.7
1120+
let r = _wholeGuts.validateScalarRange(r, from: startIndex, to: endIndex)
1121+
return Substring.UnicodeScalarView(_unchecked: _slice._base, bounds: r)
11221122
}
11231123
}
11241124

0 commit comments

Comments
 (0)