Skip to content

Commit e0bd5f7

Browse files
committed
[stdlib] Fix Substring.UnicodeScalarView.replaceSubrange
1 parent 2e9fd9e commit e0bd5f7

File tree

2 files changed

+190
-82
lines changed

2 files changed

+190
-82
lines changed

stdlib/public/core/StringGutsRangeReplaceable.swift

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,60 @@ extension _StringGuts {
342342
return Range(_uncheckedBounds: (i, j))
343343
}
344344

345+
// - Returns: The encoded offset range of the replaced contents in the result.
346+
@discardableResult
347+
internal mutating func replaceSubrange<C>(
348+
_ bounds: Range<Index>,
349+
with newElements: C
350+
) -> Range<Int>
351+
where C: Collection, C.Iterator.Element == UnicodeScalar {
352+
if isUniqueNative {
353+
if let repl = newElements as? String.UnicodeScalarView {
354+
if repl._guts.isFastUTF8 {
355+
return repl._guts.withFastUTF8 {
356+
uniqueNativeReplaceSubrange(
357+
bounds, with: $0, isASCII: repl._guts.isASCII)
358+
}
359+
}
360+
} else if let repl = newElements as? Substring.UnicodeScalarView {
361+
if repl._wholeGuts.isFastUTF8 {
362+
return repl._wholeGuts.withFastUTF8(range: repl._offsetRange) {
363+
uniqueNativeReplaceSubrange(
364+
bounds, with: $0, isASCII: repl._wholeGuts.isASCII)
365+
}
366+
}
367+
}
368+
if #available(SwiftStdlib 5.1, *) {
369+
return uniqueNativeReplaceSubrange(
370+
bounds, with: newElements.lazy.flatMap { $0.utf8 })
371+
} else {
372+
// FIXME: The stdlib should not have a deployment target this ancient.
373+
let c = newElements.reduce(0) { $0 + UTF8.width($1) }
374+
var utf8: [UInt8] = []
375+
utf8.reserveCapacity(c)
376+
utf8 = newElements.reduce(into: utf8) { utf8, next in
377+
next.withUTF8CodeUnits { utf8.append(contentsOf: $0) }
378+
}
379+
return uniqueNativeReplaceSubrange(bounds, with: utf8)
380+
}
381+
}
382+
383+
var result = String.UnicodeScalarView()
384+
// FIXME: It should be okay to get rid of excess capacity
385+
// here. rdar://problem/45635432
386+
if let capacity = self.nativeCapacity {
387+
result.reserveCapacity(capacity)
388+
}
389+
let selfStr = String.UnicodeScalarView(self)
390+
result.append(contentsOf: selfStr[..<bounds.lowerBound])
391+
let i = result._guts.count
392+
result.append(contentsOf: newElements)
393+
let j = result._guts.count
394+
result.append(contentsOf: selfStr[bounds.upperBound...])
395+
self = result._guts
396+
return Range(_uncheckedBounds: (i, j))
397+
}
398+
345399
// - Returns: The encoded offset range of the replaced contents in the result.
346400
internal mutating func uniqueNativeReplaceSubrange(
347401
_ bounds: Range<Index>,
@@ -386,5 +440,116 @@ extension _StringGuts {
386440
self = _StringGuts(_object.nativeStorage)
387441
return Range(_uncheckedBounds: (start, start + replCount))
388442
}
443+
444+
/// Run `body` to mutate the given `subrange` of this string within
445+
/// `startIndex ..< endIndex`, then update `startIndex` and `endIndex` to be
446+
/// valid positions in the resulting string, addressing the same (logical)
447+
/// locations as in the original string.
448+
///
449+
/// This is used by both `Substring` and `Substring.UnicodeScalarView` to
450+
/// implement their `replaceSubrange` methods.
451+
///
452+
/// - Parameter subrange: A scalar-aligned offset range in this string.
453+
/// - Parameter startIndex: The start index of the substring that performs
454+
/// this operation.
455+
/// - Parameter endIndex: The end index of the substring that performs this
456+
/// operations.
457+
/// - Parameter body: The mutation operation to execute on `self`. The
458+
/// returned offset range must correspond to `subrange` in the resulting
459+
/// string.
460+
internal mutating func mutateSubrangeInSubstring(
461+
subrange: Range<Index>,
462+
startIndex: inout Index,
463+
endIndex: inout Index,
464+
with body: (inout _StringGuts) -> Range<Int>
465+
) {
466+
_internalInvariant(
467+
subrange.lowerBound >= startIndex && subrange.upperBound <= endIndex)
468+
469+
if _slowPath(isKnownUTF16) {
470+
// UTF-16 (i.e., foreign) string. The mutation will convert this to the
471+
// native UTF-8 encoding, so we need to do some extra work to preserve our
472+
// bounds.
473+
let utf8StartOffset = String(self).utf8.distance(
474+
from: self.startIndex, to: startIndex)
475+
let oldUTF8Count = String(self).utf8.distance(
476+
from: startIndex, to: endIndex)
477+
478+
let oldUTF8SubrangeCount = String(self).utf8.distance(
479+
from: subrange.lowerBound, to: subrange.upperBound)
480+
481+
let newUTF8Subrange = body(&self)
482+
_internalInvariant(!isKnownUTF16)
483+
484+
let newUTF8Count =
485+
oldUTF8Count + newUTF8Subrange.count - oldUTF8SubrangeCount
486+
487+
// Get the character stride in the entire string, not just the substring.
488+
// (Characters in a substring may end beyond the bounds of it.)
489+
let newStride = _opaqueCharacterStride(
490+
startingAt: utf8StartOffset, in: utf8StartOffset ..< count)
491+
492+
startIndex = String.Index(
493+
encodedOffset: utf8StartOffset,
494+
transcodedOffset: 0,
495+
characterStride: newStride)._scalarAligned._knownUTF8
496+
if isOnGraphemeClusterBoundary(startIndex) {
497+
startIndex = startIndex._characterAligned
498+
}
499+
500+
endIndex = String.Index(
501+
encodedOffset: utf8StartOffset + newUTF8Count,
502+
transcodedOffset: 0)._scalarAligned._knownUTF8
503+
return
504+
}
505+
506+
// UTF-8 string.
507+
508+
let oldRange = subrange._encodedOffsetRange
509+
let newRange = body(&self)
510+
511+
let oldBounds = Range(
512+
_uncheckedBounds: (startIndex._encodedOffset, endIndex._encodedOffset))
513+
let newBounds = Range(_uncheckedBounds: (
514+
oldBounds.lowerBound,
515+
oldBounds.upperBound &+ newRange.count &- oldRange.count))
516+
517+
// Update `startIndex` if necessary. The replacement may have invalidated
518+
// its cached character stride and character alignment flag, but not its
519+
// stored offset, encoding, or scalar alignment.
520+
//
521+
// We are exploiting the fact that mutating the string _after_ the scalar
522+
// following the end of the character at `startIndex` cannot possibly change
523+
// the length of that character. (This is true because `index(after:)` never
524+
// needs to look ahead by more than one Unicode scalar.)
525+
let oldStride = startIndex.characterStride ?? 0
526+
if oldRange.lowerBound <= oldBounds.lowerBound &+ oldStride {
527+
// Get the character stride in the entire string, not just the substring.
528+
// (Characters in a substring may end beyond the bounds of it.)
529+
let newStride = _opaqueCharacterStride(
530+
startingAt: newBounds.lowerBound,
531+
in: newBounds.lowerBound ..< self.count)
532+
var newStart = String.Index(
533+
encodedOffset: newBounds.lowerBound,
534+
characterStride: newStride
535+
)._scalarAligned._knownUTF8
536+
537+
// Preserve character alignment flag if possible
538+
if startIndex._isCharacterAligned,
539+
(oldRange.lowerBound > oldBounds.lowerBound ||
540+
isOnGraphemeClusterBoundary(newStart)) {
541+
newStart = newStart._characterAligned
542+
}
543+
544+
startIndex = newStart
545+
}
546+
547+
// Update `endIndex`.
548+
if newBounds.upperBound != endIndex._encodedOffset {
549+
endIndex = Index(
550+
_encodedOffset: newBounds.upperBound
551+
)._scalarAligned._knownUTF8
552+
}
553+
}
389554
}
390555

stdlib/public/core/Substring.swift

Lines changed: 25 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,6 @@ extension Substring: StringProtocol {
470470
// Note: SE-0180 requires us to use `subrange` bounds even if they aren't
471471
// `Character` aligned. (We still have to round things down to the nearest
472472
// scalar boundary, though, or we may generate ill-formed encodings.)
473-
defer { _invariantCheck() }
474473
let subrange = _validateScalarRange(subrange)
475474

476475
// Replacing the range is easy -- we can just reuse `String`'s
@@ -488,85 +487,14 @@ extension Substring: StringProtocol {
488487
// merged with the Character preceding/following the replaced range.
489488
//
490489
// The best way to avoid problems in these cases is to lower index
491-
// calculations to Unicode scalars (or below). In this implementation, we
492-
// are measuring things in UTF-8 code units, for efficiency.
493-
494-
if _slowPath(_wholeGuts.isKnownUTF16) {
495-
// UTF-16 (i.e., foreign) string. The mutation will convert this to the
496-
// native UTF-8 encoding, so we need to do some extra work to preserve our
497-
// bounds.
498-
let utf8StartOffset = _slice._base.utf8.distance(
499-
from: _slice._base.startIndex,
500-
to: _slice._startIndex)
501-
let oldUTF8Count = self.utf8.count
502-
503-
let oldSubrangeCount = self.utf8.distance(
504-
from: subrange.lowerBound, to: subrange.upperBound)
505-
506-
let newUTF8Subrange = _slice._base._guts.replaceSubrange(
507-
subrange, with: newElements)
508-
_internalInvariant(!_wholeGuts.isKnownUTF16)
509-
510-
let newUTF8Count = oldUTF8Count + newUTF8Subrange.count - oldSubrangeCount
511-
512-
// Get the character stride in the entire string, not just the substring.
513-
// (Characters in a substring may end beyond the bounds of it.)
514-
let newStride = _wholeGuts._opaqueCharacterStride(
515-
startingAt: utf8StartOffset,
516-
in: utf8StartOffset ..< _wholeGuts.count)
517-
518-
_slice._startIndex = String.Index(
519-
encodedOffset: utf8StartOffset,
520-
transcodedOffset: 0,
521-
characterStride: newStride)._scalarAligned._knownUTF8
522-
_slice._endIndex = String.Index(
523-
encodedOffset: utf8StartOffset + newUTF8Count,
524-
transcodedOffset: 0)._scalarAligned._knownUTF8
525-
return
526-
}
527-
528-
// UTF-8 string.
529-
530-
let oldRange = Range(_uncheckedBounds: (
531-
subrange.lowerBound._encodedOffset, subrange.upperBound._encodedOffset))
532-
533-
let newRange = _slice._base._guts.replaceSubrange(
534-
subrange, with: newElements)
535-
536-
let newOffsetBounds = Range(_uncheckedBounds: (
537-
startIndex._encodedOffset,
538-
endIndex._encodedOffset &+ newRange.count &- oldRange.count))
539-
540-
// Update `startIndex` if necessary. The replacement may have invalidated
541-
// its cached character stride, but not its stored offset.
542-
//
543-
// We are exploiting the fact that mutating the string _after_ the scalar
544-
// following the end of the character at `startIndex` cannot possibly change
545-
// the length of that character. (This is true because `index(after:)` never
546-
// needs to look ahead by more than one Unicode scalar.)
547-
if
548-
let stride = startIndex.characterStride,
549-
oldRange.lowerBound <= startIndex._encodedOffset &+ stride
550-
{
551-
// Get the character stride in the entire string, not just the substring.
552-
// (Characters in a substring may end beyond the bounds of it.)
553-
let newStride = _wholeGuts._opaqueCharacterStride(
554-
startingAt: newOffsetBounds.lowerBound,
555-
in: newOffsetBounds.lowerBound ..< _wholeGuts.count)
556-
_slice._startIndex = String.Index(
557-
encodedOffset: startIndex._encodedOffset,
558-
characterStride: newStride
559-
)._scalarAligned._knownUTF8
560-
}
561-
562-
// Update endIndex.
563-
if newOffsetBounds.upperBound != endIndex._encodedOffset {
564-
_slice._endIndex = Index(
565-
_encodedOffset: newOffsetBounds.upperBound
566-
)._scalarAligned._knownUTF8
567-
}
490+
// calculations to Unicode scalars (or below).
491+
_slice._base._guts.mutateSubrangeInSubstring(
492+
subrange: subrange,
493+
startIndex: &_slice._startIndex,
494+
endIndex: &_slice._endIndex,
495+
with: { $0.replaceSubrange(subrange, with: newElements) })
568496

569-
// TODO(lorentey): Mark new bounds character aligned if possible
497+
_invariantCheck()
570498
}
571499

572500
/// Creates a string from the given Unicode code units in the specified
@@ -1209,17 +1137,32 @@ extension String {
12091137
}
12101138
}
12111139

1212-
// FIXME: The other String views should be RangeReplaceable too.
12131140
extension Substring.UnicodeScalarView: RangeReplaceableCollection {
12141141
@inlinable
12151142
public init() { _slice = Slice.init() }
12161143

12171144
public mutating func replaceSubrange<C: Collection>(
12181145
_ subrange: Range<Index>, with replacement: C
12191146
) where C.Element == Element {
1220-
// TODO(lorentey): Don't forward to slice
12211147
let subrange = _wholeGuts.validateScalarRange(subrange, in: _bounds)
1222-
_slice.replaceSubrange(subrange, with: replacement)
1148+
1149+
// Replacing the range is easy -- we can just reuse `String`'s
1150+
// implementation. However, we must also update `startIndex` and `endIndex`
1151+
// to keep them valid & pointing to the same positions, which is somewhat
1152+
// tricky.
1153+
//
1154+
// In Swift <=5.6, this used to forward to `Slice.replaceSubrange`, which
1155+
// (incorrectly) assumes that indices before the replaced subrange are
1156+
// preserved after the mutation. (This isn't true for strings, esp. when the
1157+
// original value is UTF-16 encoded.)
1158+
1159+
_slice._base._guts.mutateSubrangeInSubstring(
1160+
subrange: subrange,
1161+
startIndex: &_slice._startIndex,
1162+
endIndex: &_slice._endIndex,
1163+
with: { $0.replaceSubrange(subrange, with: replacement) })
1164+
1165+
_invariantCheck()
12231166
}
12241167
}
12251168

0 commit comments

Comments
 (0)