Skip to content

Commit 4ad8b26

Browse files
committed
[stdlib] String.UTF16View: Review/fix index validation
Also, in UTF-16 slices, forward collection methods to the base view instead of `Slice`, to make behavior a bit easier to understand. (There is no need to force readers to page in `Slice` implementations _in addition to_ whatever the base view is doing.)
1 parent 5f6c300 commit 4ad8b26

File tree

2 files changed

+86
-42
lines changed

2 files changed

+86
-42
lines changed

stdlib/public/core/StringUTF16View.swift

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -137,18 +137,23 @@ extension String.UTF16View: BidirectionalCollection {
137137
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
138138
@inlinable @inline(__always)
139139
public var endIndex: Index { return _guts.endIndex }
140-
140+
141141
@inlinable @inline(__always)
142142
public func index(after idx: Index) -> Index {
143+
var idx = _guts.ensureMatchingEncoding(idx)
144+
_precondition(idx._encodedOffset < _guts.count,
145+
"String index is out of bounds")
143146
if _slowPath(_guts.isForeign) { return _foreignIndex(after: idx) }
144-
if _guts.isASCII { return idx.nextEncoded._knownUTF8._knownUTF16 }
147+
if _guts.isASCII {
148+
return idx.nextEncoded._scalarAligned._knownUTF8._knownUTF16
149+
}
145150

146151
// For a BMP scalar (1-3 UTF-8 code units), advance past it. For a non-BMP
147152
// scalar, use a transcoded offset first.
148153

149154
// TODO: If transcoded is 1, can we just skip ahead 4?
150155

151-
let idx = _utf16AlignNativeIndex(idx)
156+
idx = _utf16AlignNativeIndex(idx)
152157

153158
let len = _guts.fastUTF8ScalarLength(startingAt: idx._encodedOffset)
154159
if len == 4 && idx.transcodedOffset == 0 {
@@ -163,16 +168,20 @@ extension String.UTF16View: BidirectionalCollection {
163168

164169
@inlinable @inline(__always)
165170
public func index(before idx: Index) -> Index {
166-
_precondition(!idx.isZeroPosition)
171+
var idx = _guts.ensureMatchingEncoding(idx)
172+
_precondition(!idx.isZeroPosition && idx <= endIndex,
173+
"String index is out of bounds")
167174
if _slowPath(_guts.isForeign) { return _foreignIndex(before: idx) }
168-
if _guts.isASCII { return idx.priorEncoded._knownUTF8._knownUTF16 }
175+
if _guts.isASCII {
176+
return idx.priorEncoded._scalarAligned._knownUTF8._knownUTF16
177+
}
169178

170179
if idx.transcodedOffset != 0 {
171180
_internalInvariant(idx.transcodedOffset == 1)
172-
return idx.strippingTranscoding._knownUTF8
181+
return idx.strippingTranscoding._scalarAligned._knownUTF8
173182
}
174183

175-
let idx = _utf16AlignNativeIndex(idx)
184+
idx = _utf16AlignNativeIndex(idx)
176185
let len = _guts.fastUTF8ScalarLength(endingAt: idx._encodedOffset)
177186
if len == 4 {
178187
// 2 UTF-16 code units comprise this scalar; advance to the beginning and
@@ -186,6 +195,8 @@ extension String.UTF16View: BidirectionalCollection {
186195
}
187196

188197
public func index(_ i: Index, offsetBy n: Int) -> Index {
198+
let i = _guts.ensureMatchingEncoding(i)
199+
_precondition(i <= endIndex, "String index is out of bounds")
189200
if _slowPath(_guts.isForeign) {
190201
return _foreignIndex(i, offsetBy: n)
191202
}
@@ -198,6 +209,12 @@ extension String.UTF16View: BidirectionalCollection {
198209
public func index(
199210
_ i: Index, offsetBy n: Int, limitedBy limit: Index
200211
) -> Index? {
212+
let limit = _guts.ensureMatchingEncoding(limit)
213+
guard _fastPath(limit <= endIndex) else { return index(i, offsetBy: n) }
214+
215+
let i = _guts.ensureMatchingEncoding(i)
216+
_precondition(i <= endIndex, "String index is out of bounds")
217+
201218
if _slowPath(_guts.isForeign) {
202219
return _foreignIndex(i, offsetBy: n, limitedBy: limit)
203220
}
@@ -219,6 +236,14 @@ extension String.UTF16View: BidirectionalCollection {
219236
}
220237

221238
public func distance(from start: Index, to end: Index) -> Int {
239+
let start = _guts.ensureMatchingEncoding(start)
240+
let end = _guts.ensureMatchingEncoding(end)
241+
242+
_precondition(start._encodedOffset <= _guts.count,
243+
"String index is out of bounds")
244+
_precondition(end._encodedOffset <= _guts.count,
245+
"String index is out of bounds")
246+
222247
if _slowPath(_guts.isForeign) {
223248
return _foreignDistance(from: start, to: end)
224249
}
@@ -250,8 +275,14 @@ extension String.UTF16View: BidirectionalCollection {
250275
/// less than the view's end index.
251276
@inlinable @inline(__always)
252277
public subscript(idx: Index) -> UTF16.CodeUnit {
253-
String(_guts)._boundsCheck(idx)
278+
let idx = _guts.ensureMatchingEncoding(idx)
279+
_precondition(idx._encodedOffset < _guts.count,
280+
"String index is out of bounds")
281+
return self[_unchecked: idx]
282+
}
254283

284+
@_alwaysEmitIntoClient @inline(__always)
285+
internal subscript(_unchecked idx: Index) -> UTF16.CodeUnit {
255286
if _fastPath(_guts.isFastUTF8) {
256287
let scalar = _guts.fastUTF8Scalar(
257288
startingAt: _guts.scalarAlign(idx)._encodedOffset)
@@ -427,6 +458,7 @@ extension String.UTF16View {
427458
public typealias SubSequence = Substring.UTF16View
428459

429460
public subscript(r: Range<Index>) -> Substring.UTF16View {
461+
let r = _guts.validateSubscalarRange(r)
430462
return Substring.UTF16View(self, _bounds: r)
431463
}
432464
}
@@ -474,14 +506,20 @@ extension String.UTF16View {
474506
if n > 0 ? l >= 0 && l < n : l <= 0 && n < l {
475507
return nil
476508
}
477-
return i.strippingTranscoding.encoded(offsetBy: n)
509+
let offset = i._encodedOffset &+ n
510+
_precondition(offset >= 0 && offset <= _guts.count,
511+
"String index is out of bounds")
512+
return Index(_encodedOffset: offset)._knownUTF16
478513
}
479514

480515
@usableFromInline @inline(never)
481516
@_effects(releasenone)
482517
internal func _foreignIndex(_ i: Index, offsetBy n: Int) -> Index {
483518
_internalInvariant(_guts.isForeign)
484-
return i.strippingTranscoding.encoded(offsetBy: n)
519+
let offset = i._encodedOffset &+ n
520+
_precondition(offset >= 0 && offset <= _guts.count,
521+
"String index is out of bounds")
522+
return Index(_encodedOffset: offset)._knownUTF16
485523
}
486524

487525
@usableFromInline @inline(never)
@@ -631,10 +669,11 @@ extension String.UTF16View {
631669
return utf16Count
632670
}
633671
}
634-
672+
635673
@usableFromInline
636674
@_effects(releasenone)
637675
internal func _nativeGetOffset(for idx: Index) -> Int {
676+
_internalInvariant(idx._encodedOffset <= _guts.count)
638677
// Trivial and common: start
639678
if idx == startIndex { return 0 }
640679

@@ -656,13 +695,14 @@ extension String.UTF16View {
656695
// Otherwise, find the nearest lower-bound breadcrumb and count from there
657696
let (crumb, crumbOffset) = breadcrumbsPtr.pointee.getBreadcrumb(
658697
forIndex: idx)
659-
660698
return crumbOffset + _utf16Distance(from: crumb, to: idx)
661699
}
662700

663701
@usableFromInline
664702
@_effects(releasenone)
665703
internal func _nativeGetIndex(for offset: Int) -> Index {
704+
_precondition(offset >= 0, "String index is out of bounds")
705+
666706
// Trivial and common: start
667707
if offset == 0 { return startIndex }
668708

@@ -701,6 +741,7 @@ extension String.UTF16View {
701741
}
702742

703743
while true {
744+
_precondition(readIdx < readEnd, "String index is out of bounds")
704745
let len = _utf8ScalarLength(utf8[_unchecked: readIdx])
705746
let utf16Len = len == 4 ? 2 : 1
706747
utf16I &+= utf16Len

stdlib/public/core/Substring.swift

Lines changed: 33 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -886,11 +886,14 @@ extension Substring {
886886
/// Creates an instance that slices `base` at `_bounds`.
887887
@inlinable
888888
internal init(_ base: String.UTF16View, _bounds: Range<Index>) {
889-
// TODO(lorentey): Review index validation
890-
_slice = Slice(
891-
base: String(base._guts).utf16,
892-
bounds: _bounds)
889+
_slice = Slice(base: base, bounds: _bounds)
893890
}
891+
892+
@_alwaysEmitIntoClient @inline(__always)
893+
internal var _wholeGuts: _StringGuts { _slice._base._guts }
894+
895+
@_alwaysEmitIntoClient @inline(__always)
896+
internal var _base: String.UTF16View { _slice._base }
894897
}
895898
}
896899

@@ -900,85 +903,85 @@ extension Substring.UTF16View: BidirectionalCollection {
900903
public typealias Element = String.UTF16View.Element
901904
public typealias SubSequence = Substring.UTF16View
902905

903-
//
904-
// Plumb slice operations through
905-
//
906906
@inlinable
907-
public var startIndex: Index { return _slice.startIndex }
907+
public var startIndex: Index { _slice._startIndex }
908908

909909
@inlinable
910-
public var endIndex: Index { return _slice.endIndex }
910+
public var endIndex: Index { _slice._endIndex }
911911

912912
@inlinable
913913
public subscript(index: Index) -> Element {
914-
// TODO(lorentey): Review index validation
915-
return _slice[index]
914+
let index = _wholeGuts.ensureMatchingEncoding(index)
915+
_precondition(index >= startIndex && index < endIndex,
916+
"String index is out of bounds")
917+
return _base[_unchecked: index]
916918
}
917919

918920
@inlinable
919921
public var indices: Indices { return _slice.indices }
920922

921923
@inlinable
922924
public func index(after i: Index) -> Index {
923-
// TODO(lorentey): Review index validation
924-
return _slice.index(after: i)
925+
// Note: deferred bounds check
926+
return _base.index(after: i)
925927
}
926928

927929
@inlinable
928930
public func formIndex(after i: inout Index) {
929-
// TODO(lorentey): Review index validation
930-
_slice.formIndex(after: &i)
931+
// Note: deferred bounds check
932+
_base.formIndex(after: &i)
931933
}
932934

933935
@inlinable
934936
public func index(_ i: Index, offsetBy n: Int) -> Index {
935-
// TODO(lorentey): Review index validation
936-
return _slice.index(i, offsetBy: n)
937+
// Note: deferred bounds check
938+
return _base.index(i, offsetBy: n)
937939
}
938940

939941
@inlinable
940942
public func index(
941943
_ i: Index, offsetBy n: Int, limitedBy limit: Index
942944
) -> Index? {
943-
// TODO(lorentey): Review index validation
944-
return _slice.index(i, offsetBy: n, limitedBy: limit)
945+
// Note: deferred bounds check
946+
return _base.index(i, offsetBy: n, limitedBy: limit)
945947
}
946948

947949
@inlinable
948950
public func distance(from start: Index, to end: Index) -> Int {
949-
// TODO(lorentey): Review index validation
950-
return _slice.distance(from: start, to: end)
951+
return _base.distance(from: start, to: end)
951952
}
952953

953954
@inlinable
954955
public func _failEarlyRangeCheck(_ index: Index, bounds: Range<Index>) {
955-
// TODO(lorentey): Review index validation
956-
_slice._failEarlyRangeCheck(index, bounds: bounds)
956+
// FIXME: This probably ought to ensure that all three indices have matching
957+
// encodings.
958+
_base._failEarlyRangeCheck(index, bounds: bounds)
957959
}
958960

959961
@inlinable
960962
public func _failEarlyRangeCheck(
961963
_ range: Range<Index>, bounds: Range<Index>
962964
) {
963-
// TODO(lorentey): Review index validation
964-
_slice._failEarlyRangeCheck(range, bounds: bounds)
965+
// FIXME: This probably ought to ensure that all three indices have matching
966+
// encodings.
967+
_base._failEarlyRangeCheck(range, bounds: bounds)
965968
}
966969

967970
@inlinable
968971
public func index(before i: Index) -> Index {
969-
// TODO(lorentey): Review index validation
970-
return _slice.index(before: i)
972+
// Note: deferred bounds check
973+
return _base.index(before: i)
971974
}
972975

973976
@inlinable
974977
public func formIndex(before i: inout Index) {
975-
// TODO(lorentey): Review index validation
976-
_slice.formIndex(before: &i)
978+
// Note: deferred bounds check
979+
_base.formIndex(before: &i)
977980
}
978981

979982
@inlinable
980983
public subscript(r: Range<Index>) -> Substring.UTF16View {
981-
// TODO(lorentey): Review index validation
984+
let r = _wholeGuts.validateSubscalarRange(r, from: startIndex, to: endIndex)
982985
return Substring.UTF16View(_slice.base, _bounds: r)
983986
}
984987
}

0 commit comments

Comments
 (0)