Skip to content

Commit 7b78a1d

Browse files
authored
Avoid StringUTF16View dispatch overhead for some bridged String methods (swiftlang#83529)
This removes a bunch of overhead on the UTF16 paths in String, as well as consolidating the complicated bits of the logic in one file.
1 parent b250ef7 commit 7b78a1d

File tree

7 files changed

+104
-49
lines changed

7 files changed

+104
-49
lines changed

stdlib/public/core/StringStorage.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
internal protocol _AbstractStringStorage: _NSCopying {
1818
var asString: String { get }
19+
var utf16: String.UTF16View { get }
1920
var count: Int { get }
2021
var isASCII: Bool { get }
2122
var start: UnsafePointer<UInt8> { get }
@@ -26,6 +27,7 @@ internal protocol _AbstractStringStorage: _NSCopying {
2627

2728
internal protocol _AbstractStringStorage {
2829
var asString: String { get }
30+
var utf16: String.UTF16View { get }
2931
var count: Int { get }
3032
var isASCII: Bool { get }
3133
var start: UnsafePointer<UInt8> { get }
@@ -295,6 +297,10 @@ final internal class __StringStorage
295297
get { String(_StringGuts(self)) }
296298
}
297299

300+
@inline(__always)
301+
final internal var utf16: String.UTF16View {
302+
String.UTF16View(_StringGuts(self))
303+
}
298304

299305
private init(_doNotCallMe: ()) {
300306
_internalInvariantFailure("Use the create method")
@@ -721,6 +727,11 @@ final internal class __SharedStringStorage
721727
return String(_StringGuts(self))
722728
}
723729
}
730+
731+
@inline(__always)
732+
final internal var utf16: String.UTF16View {
733+
String.UTF16View(_StringGuts(self))
734+
}
724735

725736
internal init(
726737
_mortal ptr: UnsafePointer<UInt8>,

stdlib/public/core/StringStorageBridge.swift

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,25 +43,17 @@ extension String {
4343

4444
// ObjC interfaces.
4545
extension _AbstractStringStorage {
46+
4647
@inline(__always)
4748
@_effects(releasenone)
4849
internal func _getCharacters(
4950
_ buffer: UnsafeMutablePointer<UInt16>, _ aRange: _SwiftNSRange
5051
) {
51-
_precondition(aRange.location >= 0 && aRange.length >= 0,
52-
"Range out of bounds")
53-
// Note: `count` is counting UTF-8 code units, while `aRange` is measured in
54-
// UTF-16 offsets. This precondition is a necessary, but not sufficient test
55-
// for validity. (More precise checks are done in UTF16View._nativeCopy.)
56-
_precondition(aRange.location + aRange.length <= Int(count),
57-
"Range out of bounds")
58-
5952
let range = unsafe Range(
6053
_uncheckedBounds: (aRange.location, aRange.location+aRange.length))
61-
let str = asString
62-
unsafe str._copyUTF16CodeUnits(
54+
unsafe utf16._nativeCopy(
6355
into: UnsafeMutableBufferPointer(start: buffer, count: range.count),
64-
range: range)
56+
offsetRange: range)
6557
}
6658

6759
@inline(__always)
@@ -116,6 +108,26 @@ extension _AbstractStringStorage {
116108
return _cocoaLengthOfBytesInEncodingTrampoline(self, encoding)
117109
}
118110
}
111+
112+
// The caller info isn't useful here anyway because it's never client code,
113+
// so this makes sure that _character(at:) doesn't have inlined assertion bits
114+
@inline(never)
115+
internal func _characterAtIndexOutOfBounds() -> Never {
116+
_preconditionFailure("String index is out of bounds")
117+
}
118+
119+
@inline(__always)
120+
@_effects(readonly)
121+
internal func _character(at offset: Int) -> UInt16 {
122+
if _fastPath(isASCII) {
123+
if (_fastPath(offset < count && offset >= 0)) {
124+
return unsafe UInt16((start + offset).pointee)
125+
}
126+
_characterAtIndexOutOfBounds()
127+
} else {
128+
return utf16[nativeNonASCIIOffset: offset]
129+
}
130+
}
119131

120132
@_effects(readonly)
121133
internal func _nativeIsEqual<T:_AbstractStringStorage>(
@@ -176,7 +188,7 @@ extension _AbstractStringStorage {
176188
start: utf16Ptr,
177189
count: otherUTF16Length
178190
)
179-
return unsafe asString.utf16.elementsEqual(utf16Buffer) ? 1 : 0
191+
return unsafe utf16.elementsEqual(utf16Buffer) ? 1 : 0
180192
}
181193

182194
/*
@@ -197,7 +209,7 @@ extension __StringStorage {
197209
if isASCII {
198210
return count
199211
}
200-
return asString.utf16.count
212+
return utf16.count
201213
}
202214
}
203215

@@ -214,8 +226,7 @@ extension __StringStorage {
214226
@objc(characterAtIndex:)
215227
@_effects(readonly)
216228
final internal func character(at offset: Int) -> UInt16 {
217-
let str = asString
218-
return str.utf16[str._toUTF16Index(offset)]
229+
_character(at: offset)
219230
}
220231

221232
@objc(getCharacters:range:)
@@ -313,7 +324,7 @@ extension __SharedStringStorage {
313324
if isASCII {
314325
return count
315326
}
316-
return asString.utf16.count
327+
return utf16.count
317328
}
318329
}
319330

@@ -330,8 +341,7 @@ extension __SharedStringStorage {
330341
@objc(characterAtIndex:)
331342
@_effects(readonly)
332343
final internal func character(at offset: Int) -> UInt16 {
333-
let str = asString
334-
return str.utf16[str._toUTF16Index(offset)]
344+
_character(at: offset)
335345
}
336346

337347
@objc(getCharacters:range:)

stdlib/public/core/StringUTF16View.swift

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,22 @@ extension String.UTF16View: BidirectionalCollection {
435435

436436
return _foreignSubscript(position: idx)
437437
}
438+
439+
internal subscript(nativeNonASCIIOffset offset: Int) -> UTF16.CodeUnit {
440+
@_effects(releasenone) get {
441+
let threshold = _breadcrumbStride / 2
442+
// Do not use breadcrumbs if directly computing the result is expected
443+
// to be cheaper
444+
let idx = offset < threshold ?
445+
_index(startIndex, offsetBy: offset)._knownUTF8 :
446+
_nativeGetIndex(for: offset)
447+
_precondition(idx._encodedOffset < _guts.count,
448+
"String index is out of bounds")
449+
let scalar = _guts.fastUTF8Scalar(
450+
startingAt: _guts.scalarAlign(idx)._encodedOffset)
451+
return scalar.utf16[idx.transcodedOffset]
452+
}
453+
}
438454
}
439455

440456
extension String.UTF16View {
@@ -948,6 +964,21 @@ extension String.UTF16View {
948964
fatalError()
949965
}
950966
}
967+
968+
// See _nativeCopy(into:alignedRange:), except this uses un-verified UTF16
969+
// offsets instead of aligned indexes
970+
internal func _nativeCopy(
971+
into buffer: UnsafeMutableBufferPointer<UInt16>,
972+
offsetRange range: Range<Int>
973+
) {
974+
let alignedRange = _indexRange(for: range, from: startIndex)
975+
_precondition(alignedRange.lowerBound._encodedOffset <= _guts.count &&
976+
alignedRange.upperBound._encodedOffset <= _guts.count,
977+
"String index is out of bounds")
978+
unsafe _nativeCopy(
979+
into: buffer,
980+
alignedRange: alignedRange.lowerBound ..< alignedRange.upperBound)
981+
}
951982

952983
// Copy (i.e. transcode to UTF-16) our contents into a buffer. `alignedRange`
953984
// means that the indices are part of the UTF16View.indices -- they are either
@@ -962,16 +993,16 @@ extension String.UTF16View {
962993
range.lowerBound == _utf16AlignNativeIndex(range.lowerBound))
963994
_internalInvariant(
964995
range.upperBound == _utf16AlignNativeIndex(range.upperBound))
965-
996+
966997
if _slowPath(range.isEmpty) { return }
967-
998+
968999
let isASCII = _guts.isASCII
9691000
return unsafe _guts.withFastUTF8 { utf8 in
9701001
var writeIdx = 0
9711002
let writeEnd = buffer.count
9721003
var readIdx = range.lowerBound._encodedOffset
9731004
let readEnd = range.upperBound._encodedOffset
974-
1005+
9751006
if isASCII {
9761007
_internalInvariant(range.lowerBound.transcodedOffset == 0)
9771008
_internalInvariant(range.upperBound.transcodedOffset == 0)
@@ -984,7 +1015,7 @@ extension String.UTF16View {
9841015
}
9851016
return
9861017
}
987-
1018+
9881019
// Handle mid-transcoded-scalar initial index
9891020
if _slowPath(range.lowerBound.transcodedOffset != 0) {
9901021
_internalInvariant(range.lowerBound.transcodedOffset == 1)
@@ -995,7 +1026,7 @@ extension String.UTF16View {
9951026
readIdx &+= len
9961027
writeIdx &+= 1
9971028
}
998-
1029+
9991030
// Transcode middle
10001031
while readIdx < readEnd {
10011032
let (scalar, len) = unsafe _decodeScalar(utf8, startingAt: readIdx)
@@ -1009,13 +1040,13 @@ extension String.UTF16View {
10091040
writeIdx &+= 1
10101041
}
10111042
}
1012-
1043+
10131044
// Handle mid-transcoded-scalar final index
10141045
if _slowPath(range.upperBound.transcodedOffset == 1) {
10151046
_internalInvariant(writeIdx < writeEnd)
10161047
let (scalar, _) = unsafe _decodeScalar(utf8, startingAt: readIdx)
10171048
_internalInvariant(scalar.utf16.count == 2)
1018-
1049+
10191050
// Note: this is intentionally not using the _unchecked subscript.
10201051
// (We rely on debug assertions to catch out of bounds access.)
10211052
unsafe buffer[writeIdx] = scalar.utf16[0]

stdlib/public/core/UnicodeHelpers.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ internal func _decodeUTF8(
6161
return Unicode.Scalar(_unchecked: value)
6262
}
6363

64-
@inlinable
64+
@inlinable @inline(__always)
6565
internal func _decodeScalar(
6666
_ utf8: UnsafeBufferPointer<UInt8>, startingAt i: Int
6767
) -> (Unicode.Scalar, scalarLength: Int) {
@@ -207,7 +207,7 @@ extension _StringGuts {
207207
}
208208
}
209209

210-
@inlinable
210+
@inlinable @inline(__always)
211211
internal func fastUTF8Scalar(startingAt i: Int) -> Unicode.Scalar {
212212
_internalInvariant(isFastUTF8)
213213
return unsafe self.withFastUTF8 { unsafe _decodeScalar($0, startingAt: i).0 }

stdlib/public/core/UnsafeBufferPointer.swift.gyb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ extension Unsafe${Mutable}BufferPointer: @unsafe Sequence {
212212
return (unsafe Iterator(_position: s + n, _end: s + count), n)
213213
}
214214

215-
@inlinable
215+
@inlinable @_transparent
216216
@safe
217217
public func withContiguousStorageIfAvailable<R>(
218218
_ body: (UnsafeBufferPointer<Element>) throws -> R
@@ -780,7 +780,7 @@ extension Unsafe${Mutable}BufferPointer:
780780
return try unsafe body(&self)
781781
}
782782

783-
@inlinable
783+
@inlinable @_transparent
784784
@safe
785785
public mutating func withContiguousMutableStorageIfAvailable<R>(
786786
_ body: (inout UnsafeMutableBufferPointer<Element>) throws -> R
@@ -825,7 +825,7 @@ extension Unsafe${Mutable}BufferPointer {
825825
/// - `rebased.count == slice.count`
826826
///
827827
/// - Parameter slice: The buffer slice to rebase.
828-
@inlinable // unsafe-performance
828+
@inlinable @_transparent // unsafe-performance
829829
public init(rebasing slice: Slice<UnsafeBufferPointer<Element>>) {
830830
// NOTE: `Slice` does not guarantee that its start/end indices are valid
831831
// in `base` -- it merely ensures that `startIndex <= endIndex`.
@@ -864,7 +864,7 @@ extension Unsafe${Mutable}BufferPointer {
864864
/// - `rebased.count == slice.count`
865865
///
866866
/// - Parameter slice: The buffer slice to rebase.
867-
@inlinable // unsafe-performance
867+
@inlinable @_transparent // unsafe-performance
868868
public init(rebasing slice: Slice<UnsafeMutableBufferPointer<Element>>) {
869869
let base = unsafe slice.base.baseAddress?.advanced(by: slice.startIndex)
870870
let count = unsafe slice.endIndex &- slice.startIndex
@@ -993,7 +993,7 @@ extension UnsafeMutableBufferPointer {
993993
/// initialize the buffer's storage.
994994
/// - Returns: The index one past the last element of the buffer initialized
995995
/// by this function.
996-
@_alwaysEmitIntoClient
996+
@_alwaysEmitIntoClient @_transparent
997997
public func initialize(
998998
fromContentsOf source: some Collection<Element>
999999
) -> Index {
@@ -1418,7 +1418,7 @@ extension Unsafe${Mutable}BufferPointer where Element: ~Copyable {
14181418
/// method.
14191419
/// - buffer: The buffer temporarily bound to `T`.
14201420
/// - Returns: The return value, if any, of the `body` closure parameter.
1421-
@_alwaysEmitIntoClient
1421+
@_alwaysEmitIntoClient @_transparent
14221422
public func withMemoryRebound<T: ~Copyable, E: Error, Result: ~Copyable>(
14231423
to type: T.Type,
14241424
_ body: (_ buffer: ${Self}<T>) throws(E) -> Result

0 commit comments

Comments
 (0)