Skip to content

Commit 5f6c300

Browse files
committed
[stdlib] String.UTF8View: Review/fix index validation
Also, in UTF-8 slices, forward collection methods to the base view instead of `Slice`, to make behavior a bit easier to understand. (There is no need to force readers to page in `Slice` implementations _in addition to_ whatever the base view is doing.)
1 parent 8610bdf commit 5f6c300

File tree

3 files changed

+110
-31
lines changed

3 files changed

+110
-31
lines changed

stdlib/public/core/StringGuts.swift

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,9 @@ extension _StringGuts {
456456
_precondition(i >= start && i < end, "Substring index is out of bounds")
457457
return scalarAlign(i)
458458
}
459+
}
459460

461+
extension _StringGuts {
460462
/// Validate `i` and adjust its position toward the start, returning the
461463
/// resulting index or trapping as appropriate. If this function returns, then
462464
/// the returned value
@@ -491,7 +493,47 @@ extension _StringGuts {
491493
_precondition(i >= start && i <= end, "Substring index is out of bounds")
492494
return scalarAlign(i)
493495
}
496+
}
497+
498+
extension _StringGuts {
499+
@_alwaysEmitIntoClient
500+
internal func validateSubscalarRange(
501+
_ range: Range<String.Index>
502+
) -> Range<String.Index> {
503+
let upper = ensureMatchingEncoding(range.upperBound)
504+
let lower = ensureMatchingEncoding(range.lowerBound)
505+
506+
// Note: if only `lower` was miscoded, then the range invariant `lower <=
507+
// upper` may no longer hold after the above conversions, so we need to
508+
// re-check it here.
509+
_precondition(upper._encodedOffset <= count && lower <= upper,
510+
"String index range is out of bounds")
511+
512+
return Range(_uncheckedBounds: (lower, upper))
513+
}
514+
515+
@_alwaysEmitIntoClient
516+
internal func validateSubscalarRange(
517+
_ range: Range<String.Index>,
518+
from start: String.Index,
519+
to end: String.Index
520+
) -> Range<String.Index> {
521+
_internalInvariant(start <= end && end <= endIndex)
522+
523+
let upper = ensureMatchingEncoding(range.upperBound)
524+
let lower = ensureMatchingEncoding(range.lowerBound)
525+
526+
// Note: if only `lower` was miscoded, then the range invariant `lower <=
527+
// upper` may no longer hold after the above conversions, so we need to
528+
// re-check it here.
529+
_precondition(upper <= end && lower >= start && lower <= upper,
530+
"Substring index range is out of bounds")
531+
532+
return Range(_uncheckedBounds: (lower, upper))
533+
}
534+
}
494535

536+
extension _StringGuts {
495537
/// Validate `range` and adjust the position of its bounds, returning the
496538
/// resulting range or trapping as appropriate. If this function returns, then
497539
/// the bounds of the returned value

stdlib/public/core/StringUTF8View.swift

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,28 +136,37 @@ extension String.UTF8View: BidirectionalCollection {
136136
/// - Precondition: The next position is representable.
137137
@inlinable @inline(__always)
138138
public func index(after i: Index) -> Index {
139+
let i = _guts.ensureMatchingEncoding(i)
139140
if _fastPath(_guts.isFastUTF8) {
141+
// Note: deferred bounds check
140142
return i.strippingTranscoding.nextEncoded._knownUTF8
141143
}
142-
144+
_precondition(i._encodedOffset < _guts.count,
145+
"String index is out of bounds")
143146
return _foreignIndex(after: i)
144147
}
145148

146149
@inlinable @inline(__always)
147150
public func index(before i: Index) -> Index {
148-
_precondition(!i.isZeroPosition)
151+
let i = _guts.ensureMatchingEncoding(i)
152+
_precondition(!i.isZeroPosition, "String index is out of bounds")
149153
if _fastPath(_guts.isFastUTF8) {
150154
return i.strippingTranscoding.priorEncoded._knownUTF8
151155
}
152156

157+
_precondition(i._encodedOffset <= _guts.count,
158+
"String index is out of bounds")
153159
return _foreignIndex(before: i)
154160
}
155161

156162
@inlinable @inline(__always)
157163
public func index(_ i: Index, offsetBy n: Int) -> Index {
164+
let i = _guts.ensureMatchingEncoding(i)
158165
if _fastPath(_guts.isFastUTF8) {
159-
_precondition(n + i._encodedOffset <= _guts.count)
160-
return i.strippingTranscoding.encoded(offsetBy: n)
166+
let offset = n + i._encodedOffset
167+
_precondition(offset >= 0 && offset <= _guts.count,
168+
"String index is out of bounds")
169+
return Index(_encodedOffset: offset)._knownUTF8
161170
}
162171

163172
return _foreignIndex(i, offsetBy: n)
@@ -167,6 +176,7 @@ extension String.UTF8View: BidirectionalCollection {
167176
public func index(
168177
_ i: Index, offsetBy n: Int, limitedBy limit: Index
169178
) -> Index? {
179+
let i = _guts.ensureMatchingEncoding(i)
170180
if _fastPath(_guts.isFastUTF8) {
171181
// Check the limit: ignore limit if it precedes `i` (in the correct
172182
// direction), otherwise must not be beyond limit (in the correct
@@ -179,6 +189,8 @@ extension String.UTF8View: BidirectionalCollection {
179189
} else {
180190
guard limitOffset > iOffset || result >= limitOffset else { return nil }
181191
}
192+
_precondition(result >= 0 && result <= _guts.count,
193+
"String index is out of bounds")
182194
return Index(_encodedOffset: result)
183195
}
184196

@@ -187,9 +199,14 @@ extension String.UTF8View: BidirectionalCollection {
187199

188200
@inlinable @inline(__always)
189201
public func distance(from i: Index, to j: Index) -> Int {
202+
let i = _guts.ensureMatchingEncoding(i)
203+
let j = _guts.ensureMatchingEncoding(j)
190204
if _fastPath(_guts.isFastUTF8) {
191205
return j._encodedOffset &- i._encodedOffset
192206
}
207+
_precondition(
208+
i._encodedOffset <= _guts.count && j._encodedOffset <= _guts.count,
209+
"String index is out of bounds")
193210
return _foreignDistance(from: i, to: j)
194211
}
195212

@@ -207,7 +224,14 @@ extension String.UTF8View: BidirectionalCollection {
207224
/// must be less than the view's end index.
208225
@inlinable @inline(__always)
209226
public subscript(i: Index) -> UTF8.CodeUnit {
210-
String(_guts)._boundsCheck(i)
227+
let i = _guts.ensureMatchingEncoding(i)
228+
_precondition(i._encodedOffset < _guts.count,
229+
"String index is out of bounds")
230+
return self[_unchecked: i]
231+
}
232+
233+
@_alwaysEmitIntoClient @inline(__always)
234+
internal subscript(_unchecked i: Index) -> UTF8.CodeUnit {
211235
if _fastPath(_guts.isFastUTF8) {
212236
return _guts.withFastUTF8 { utf8 in utf8[_unchecked: i._encodedOffset] }
213237
}
@@ -373,6 +397,7 @@ extension String.UTF8View {
373397
@inlinable
374398
@available(swift, introduced: 4)
375399
public subscript(r: Range<Index>) -> String.UTF8View.SubSequence {
400+
let r = _guts.validateSubscalarRange(r)
376401
return Substring.UTF8View(self, _bounds: r)
377402
}
378403
}
@@ -422,6 +447,7 @@ extension String.UTF8View {
422447
@_effects(releasenone)
423448
internal func _foreignIndex(after idx: Index) -> Index {
424449
_internalInvariant(_guts.isForeign)
450+
_internalInvariant(idx._encodedOffset < _guts.count)
425451

426452
let idx = _utf8AlignForeignIndex(idx)
427453

@@ -448,6 +474,7 @@ extension String.UTF8View {
448474
@_effects(releasenone)
449475
internal func _foreignIndex(before idx: Index) -> Index {
450476
_internalInvariant(_guts.isForeign)
477+
_internalInvariant(idx._encodedOffset <= _guts.count)
451478

452479
let idx = _utf8AlignForeignIndex(idx)
453480

stdlib/public/core/Substring.swift

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,12 @@ extension Substring {
731731
base: String(base._guts).utf8,
732732
bounds: _bounds)
733733
}
734+
735+
@_alwaysEmitIntoClient @inline(__always)
736+
internal var _wholeGuts: _StringGuts { _slice._base._guts }
737+
738+
@_alwaysEmitIntoClient @inline(__always)
739+
internal var _base: String.UTF8View { _slice._base }
734740
}
735741
}
736742

@@ -740,48 +746,52 @@ extension Substring.UTF8View: BidirectionalCollection {
740746
public typealias Element = String.UTF8View.Element
741747
public typealias SubSequence = Substring.UTF8View
742748

743-
//
744-
// Plumb slice operations through
745-
//
746749
@inlinable
747-
public var startIndex: Index { return _slice.startIndex }
750+
public var startIndex: Index { _slice._startIndex }
748751

749752
@inlinable
750-
public var endIndex: Index { return _slice.endIndex }
753+
public var endIndex: Index { _slice._endIndex }
751754

752755
@inlinable
753-
public subscript(index: Index) -> Element { return _slice[index] }
756+
public subscript(index: Index) -> Element {
757+
let index = _wholeGuts.ensureMatchingEncoding(index)
758+
_precondition(index >= startIndex && index < endIndex,
759+
"String index is out of bounds")
760+
return _base[_unchecked: index]
761+
}
754762

755763
@inlinable
756764
public var indices: Indices { return _slice.indices }
757765

758766
@inlinable
759-
public func index(after i: Index) -> Index { return _slice.index(after: i) }
767+
public func index(after i: Index) -> Index {
768+
// Note: deferred bounds check
769+
return _base.index(after: i)
770+
}
760771

761772
@inlinable
762773
public func formIndex(after i: inout Index) {
763-
// TODO(lorentey): Review index validation
764-
_slice.formIndex(after: &i)
774+
// Note: deferred bounds check
775+
_base.formIndex(after: &i)
765776
}
766777

767778
@inlinable
768779
public func index(_ i: Index, offsetBy n: Int) -> Index {
769-
// TODO(lorentey): Review index validation
770-
return _slice.index(i, offsetBy: n)
780+
// Note: deferred bounds check
781+
return _base.index(i, offsetBy: n)
771782
}
772783

773784
@inlinable
774785
public func index(
775786
_ i: Index, offsetBy n: Int, limitedBy limit: Index
776787
) -> Index? {
777-
// TODO(lorentey): Review index validation
778-
return _slice.index(i, offsetBy: n, limitedBy: limit)
788+
// Note: deferred bounds check
789+
return _base.index(i, offsetBy: n, limitedBy: limit)
779790
}
780791

781792
@inlinable
782793
public func distance(from start: Index, to end: Index) -> Int {
783-
// TODO(lorentey): Review index validation
784-
return _slice.distance(from: start, to: end)
794+
return _base.distance(from: start, to: end)
785795
}
786796

787797
@_alwaysEmitIntoClient
@@ -794,36 +804,36 @@ extension Substring.UTF8View: BidirectionalCollection {
794804

795805
@inlinable
796806
public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) {
797-
// TODO(lorentey): Review index validation
798-
_slice._failEarlyRangeCheck(index, bounds: bounds)
807+
// FIXME: This probably ought to ensure that all three indices have matching
808+
// encodings.
809+
_base._failEarlyRangeCheck(index, bounds: bounds)
799810
}
800811

801812
@inlinable
802813
public func _failEarlyRangeCheck(
803814
_ range: Range<Index>, bounds: Range<Index>
804815
) {
805-
// TODO(lorentey): Review index validation
806-
_slice._failEarlyRangeCheck(range, bounds: bounds)
816+
// FIXME: This probably ought to ensure that all three indices have matching
817+
// encodings.
818+
_base._failEarlyRangeCheck(range, bounds: bounds)
807819
}
808820

809821
@inlinable
810822
public func index(before i: Index) -> Index {
811-
// TODO(lorentey): Review index validation
812-
return _slice.index(before: i)
823+
// Note: deferred bounds check
824+
return _base.index(before: i)
813825
}
814826

815827
@inlinable
816828
public func formIndex(before i: inout Index) {
817-
// TODO(lorentey): Review index validation
818-
_slice.formIndex(before: &i)
829+
// Note: deferred bounds check
830+
_base.formIndex(before: &i)
819831
}
820832

821833
@inlinable
822834
public subscript(r: Range<Index>) -> Substring.UTF8View {
823-
// TODO(lorentey): Review index validation
824835
// FIXME(strings): tests.
825-
_precondition(r.lowerBound >= startIndex && r.upperBound <= endIndex,
826-
"UTF8View index range out of bounds")
836+
let r = _wholeGuts.validateSubscalarRange(r, from: startIndex, to: endIndex)
827837
return Substring.UTF8View(_slice.base, _bounds: r)
828838
}
829839
}

0 commit comments

Comments
 (0)