Skip to content

Commit 96f6f5e

Browse files
authored
Merge pull request #82442 from glessard/rdar147500261-utf8span-32bit
2 parents c2d9d67 + db664fb commit 96f6f5e

File tree

7 files changed

+436
-123
lines changed

7 files changed

+436
-123
lines changed

stdlib/public/core/SmallString.swift

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,11 @@ internal struct _SmallString {
7979
extension _SmallString {
8080
@inlinable @inline(__always)
8181
internal static var capacity: Int {
82-
#if _pointerBitWidth(_32) || _pointerBitWidth(_16)
82+
#if _pointerBitWidth(_32) && os(watchOS)
8383
return 10
84+
#elseif _pointerBitWidth(_32) || _pointerBitWidth(_16)
85+
// Note: changed from 10 for contiguous storage.
86+
return 8
8487
#elseif os(Android) && arch(arm64)
8588
return 14
8689
#elseif _pointerBitWidth(_64)
@@ -90,6 +93,15 @@ extension _SmallString {
9093
#endif
9194
}
9295

96+
@_alwaysEmitIntoClient @inline(__always)
97+
internal static func contiguousCapacity() -> Int {
98+
#if _pointerBitWidth(_32) && os(watchOS)
99+
return capacity &- 2
100+
#else
101+
return capacity
102+
#endif
103+
}
104+
93105
// Get an integer equivalent to the _StringObject.discriminatedObjectRawBits
94106
// computed property.
95107
@inlinable @inline(__always)

stdlib/public/core/StringUTF8View.swift

Lines changed: 79 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,35 @@ extension String.UTF8View {
317317
}
318318
}
319319

320+
@available(SwiftStdlib 6.2, *)
320321
extension String.UTF8View {
321322

323+
@lifetime(borrow self)
324+
internal borrowing func _underlyingSpan() -> Span<UTF8.CodeUnit> {
325+
#if _runtime(_ObjC)
326+
// handle non-UTF8 Objective-C bridging cases here
327+
if !_guts.isFastUTF8, _guts._object.hasObjCBridgeableObject {
328+
let storage = _guts._getOrAllocateAssociatedStorage()
329+
let (start, count) = unsafe (storage.start, storage.count)
330+
let span = unsafe Span(_unsafeStart: start, count: count)
331+
return unsafe _overrideLifetime(span, borrowing: self)
332+
}
333+
#endif // _runtime(_ObjC)
334+
let count = _guts.count
335+
if _guts.isSmall {
336+
let a = Builtin.addressOfBorrow(self)
337+
let address = unsafe UnsafePointer<UTF8.CodeUnit>(a)
338+
let span = unsafe Span(_unsafeStart: address, count: count)
339+
return unsafe _overrideLifetime(span, borrowing: self)
340+
}
341+
let isFastUTF8 = _guts.isFastUTF8
342+
_precondition(isFastUTF8, "String must be contiguous UTF8")
343+
let buffer = unsafe _guts._object.fastUTF8
344+
let span = unsafe Span(_unsafeElements: buffer)
345+
return unsafe _overrideLifetime(span, borrowing: self)
346+
}
347+
348+
#if !(os(watchOS) && _pointerBitWidth(_32))
322349
/// A span over the UTF8 code units that make up this string.
323350
///
324351
/// - Note: In the case of bridged UTF16 String instances (on Apple
@@ -334,29 +361,61 @@ extension String.UTF8View {
334361
public var span: Span<UTF8.CodeUnit> {
335362
@lifetime(borrow self)
336363
borrowing get {
337-
#if _runtime(_ObjC)
338-
// handle non-UTF8 Objective-C bridging cases here
339-
if !_guts.isFastUTF8, _guts._object.hasObjCBridgeableObject {
340-
let storage = _guts._getOrAllocateAssociatedStorage()
341-
let (start, count) = unsafe (storage.start, storage.count)
342-
let span = unsafe Span(_unsafeStart: start, count: count)
343-
return unsafe _overrideLifetime(span, borrowing: self)
344-
}
345-
#endif
346-
let count = _guts.count
347-
if _guts.isSmall {
348-
let a = Builtin.addressOfBorrow(self)
349-
let address = unsafe UnsafePointer<UTF8.CodeUnit>(a)
350-
let span = unsafe Span(_unsafeStart: address, count: count)
351-
return unsafe _overrideLifetime(span, borrowing: self)
364+
_underlyingSpan()
365+
}
366+
}
367+
368+
/// A span over the UTF8 code units that make up this string.
369+
///
370+
/// - Note: In the case of bridged UTF16 String instances (on Apple
371+
/// platforms,) this property transcodes the code units the first time
372+
/// it is called. The transcoded buffer is cached, and subsequent calls
373+
/// to `span` can reuse the buffer.
374+
///
375+
/// Returns: a `Span` over the UTF8 code units of this String.
376+
///
377+
/// Complexity: O(1) for native UTF8 Strings,
378+
/// amortized O(1) for bridged UTF16 Strings.
379+
@available(SwiftStdlib 6.2, *)
380+
public var _span: Span<UTF8.CodeUnit>? {
381+
@_alwaysEmitIntoClient @inline(__always)
382+
@lifetime(borrow self)
383+
borrowing get {
384+
span
385+
}
386+
}
387+
#else // !(os(watchOS) && _pointerBitWidth(_32))
388+
@available(watchOS, unavailable)
389+
public var span: Span<UTF8.CodeUnit> {
390+
@lifetime(borrow self)
391+
borrowing get {
392+
fatalError("\(#function) unavailable on 32-bit watchOS")
393+
}
394+
}
395+
396+
/// A span over the UTF8 code units that make up this string.
397+
///
398+
/// - Note: In the case of bridged UTF16 String instances (on Apple
399+
/// platforms,) this property transcodes the code units the first time
400+
/// it is called. The transcoded buffer is cached, and subsequent calls
401+
/// to `span` can reuse the buffer.
402+
///
403+
/// Returns: a `Span` over the UTF8 code units of this String, or `nil`
404+
/// if the String does not have a contiguous representation.
405+
///
406+
/// Complexity: O(1) for native UTF8 Strings,
407+
/// amortized O(1) for bridged UTF16 Strings.
408+
@available(SwiftStdlib 6.2, *)
409+
public var _span: Span<UTF8.CodeUnit>? {
410+
@lifetime(borrow self)
411+
borrowing get {
412+
if _guts.isSmall, _guts.count > _SmallString.contiguousCapacity() {
413+
return nil
352414
}
353-
let isFastUTF8 = _guts.isFastUTF8
354-
_precondition(isFastUTF8, "String must be contiguous UTF8")
355-
let buffer = unsafe _guts._object.fastUTF8
356-
let span = unsafe Span(_unsafeElements: buffer)
357-
return unsafe _overrideLifetime(span, borrowing: self)
415+
return _underlyingSpan()
358416
}
359417
}
418+
#endif // !(os(watchOS) && _pointerBitWidth(_32))
360419
}
361420

362421
// Index conversions

stdlib/public/core/Substring.swift

Lines changed: 101 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -749,8 +749,38 @@ extension Substring.UTF8View: BidirectionalCollection {
749749
}
750750
}
751751

752+
@available(SwiftStdlib 6.2, *)
752753
extension Substring.UTF8View {
753754

755+
@lifetime(borrow self)
756+
private borrowing func _underlyingSpan() -> Span<UTF8.CodeUnit> {
757+
#if _runtime(_ObjC)
758+
// handle non-UTF8 Objective-C bridging cases here
759+
if !_wholeGuts.isFastUTF8, _wholeGuts._object.hasObjCBridgeableObject {
760+
let base: String.UTF8View = self._base
761+
let first = base._foreignDistance(from: base.startIndex, to: startIndex)
762+
let count = base._foreignDistance(from: startIndex, to: endIndex)
763+
let span = base._underlyingSpan()._extracting(first..<(first &+ count))
764+
return unsafe _overrideLifetime(span, borrowing: self)
765+
}
766+
#endif // _runtime(_ObjC)
767+
let first = _slice._startIndex._encodedOffset
768+
let end = _slice._endIndex._encodedOffset
769+
if _wholeGuts.isSmall {
770+
let a = Builtin.addressOfBorrow(self)
771+
let offset = first &+ (2 &* MemoryLayout<String.Index>.stride)
772+
let start = unsafe UnsafePointer<UTF8.CodeUnit>(a).advanced(by: offset)
773+
let span = unsafe Span(_unsafeStart: start, count: end &- first)
774+
return unsafe _overrideLifetime(span, borrowing: self)
775+
}
776+
let isFastUTF8 = _wholeGuts.isFastUTF8
777+
_precondition(isFastUTF8, "Substring must be contiguous UTF8")
778+
var span = unsafe Span(_unsafeElements: _wholeGuts._object.fastUTF8)
779+
span = span._extracting(first..<end)
780+
return unsafe _overrideLifetime(span, borrowing: self)
781+
}
782+
783+
#if !(os(watchOS) && _pointerBitWidth(_32))
754784
/// A span over the UTF8 code units that make up this substring.
755785
///
756786
/// - Note: In the case of bridged UTF16 String instances (on Apple
@@ -776,32 +806,80 @@ extension Substring.UTF8View {
776806
public var span: Span<UTF8.CodeUnit> {
777807
@lifetime(borrow self)
778808
borrowing get {
779-
#if _runtime(_ObjC)
780-
// handle non-UTF8 Objective-C bridging cases here
781-
if !_wholeGuts.isFastUTF8, _wholeGuts._object.hasObjCBridgeableObject {
782-
let base: String.UTF8View = self._base
783-
let first = base._foreignDistance(from: base.startIndex, to: startIndex)
784-
let count = base._foreignDistance(from: startIndex, to: endIndex)
785-
let span = base.span._extracting(first..<(first &+ count))
786-
return unsafe _overrideLifetime(span, borrowing: self)
787-
}
788-
#endif
789-
let first = _slice._startIndex._encodedOffset
790-
let end = _slice._endIndex._encodedOffset
791-
if _wholeGuts.isSmall {
792-
let a = Builtin.addressOfBorrow(self)
793-
let offset = first &+ (2 &* MemoryLayout<String.Index>.stride)
794-
let start = unsafe UnsafePointer<UTF8.CodeUnit>(a).advanced(by: offset)
795-
let span = unsafe Span(_unsafeStart: start, count: end &- first)
796-
return unsafe _overrideLifetime(span, borrowing: self)
809+
_underlyingSpan()
810+
}
811+
}
812+
813+
/// A span over the UTF8 code units that make up this substring.
814+
///
815+
/// - Note: In the case of bridged UTF16 String instances (on Apple
816+
/// platforms,) this property needs to transcode the code units every time
817+
/// it is called.
818+
/// For example, if `string` has the bridged UTF16 representation,
819+
/// for word in string.split(separator: " ") {
820+
/// useSpan(word.span)
821+
/// }
822+
/// is accidentally quadratic because of this issue. A workaround is to
823+
/// explicitly convert the string into its native UTF8 representation:
824+
/// var nativeString = consume string
825+
/// nativeString.makeContiguousUTF8()
826+
/// for word in nativeString.split(separator: " ") {
827+
/// useSpan(word.span)
828+
/// }
829+
/// This second option has linear time complexity, as expected.
830+
///
831+
/// Returns: a `Span` over the UTF8 code units of this Substring.
832+
///
833+
/// Complexity: O(1) for native UTF8 Strings, O(n) for bridged UTF16 Strings.
834+
@available(SwiftStdlib 6.2, *)
835+
public var _span: Span<UTF8.CodeUnit>? {
836+
@_alwaysEmitIntoClient @inline(__always)
837+
@lifetime(borrow self)
838+
borrowing get {
839+
span
840+
}
841+
}
842+
#else // !(os(watchOS) && _pointerBitWidth(_32))
843+
@available(watchOS, unavailable)
844+
public var span: Span<UTF8.CodeUnit> {
845+
fatalError("\(#function) unavailable on 32-bit watchOS")
846+
}
847+
848+
/// A span over the UTF8 code units that make up this substring.
849+
///
850+
/// - Note: In the case of bridged UTF16 String instances (on Apple
851+
/// platforms,) this property needs to transcode the code units every time
852+
/// it is called.
853+
/// For example, if `string` has the bridged UTF16 representation,
854+
/// for word in string.split(separator: " ") {
855+
/// useSpan(word.span)
856+
/// }
857+
/// is accidentally quadratic because of this issue. A workaround is to
858+
/// explicitly convert the string into its native UTF8 representation:
859+
/// var nativeString = consume string
860+
/// nativeString.makeContiguousUTF8()
861+
/// for word in nativeString.split(separator: " ") {
862+
/// useSpan(word.span)
863+
/// }
864+
/// This second option has linear time complexity, as expected.
865+
///
866+
/// Returns: a `Span` over the UTF8 code units of this Substring, or `nil`
867+
/// if the Substring does not have a contiguous representation.
868+
///
869+
/// Complexity: O(1) for native UTF8 Strings, O(n) for bridged UTF16 Strings.
870+
@available(SwiftStdlib 6.2, *)
871+
public var _span: Span<UTF8.CodeUnit>? {
872+
@lifetime(borrow self)
873+
borrowing get {
874+
if _wholeGuts.isSmall,
875+
_wholeGuts.count > _SmallString.contiguousCapacity() {
876+
// substring is spannable only when the whole string is spannable.
877+
return nil
797878
}
798-
let isFastUTF8 = _wholeGuts.isFastUTF8
799-
_precondition(isFastUTF8, "Substring must be contiguous UTF8")
800-
var span = unsafe Span(_unsafeElements: _wholeGuts._object.fastUTF8)
801-
span = span._extracting(first..<end)
802-
return unsafe _overrideLifetime(span, borrowing: self)
879+
return _underlyingSpan()
803880
}
804881
}
882+
#endif // !(os(watchOS) && _pointerBitWidth(_32))
805883
}
806884

807885
extension Substring {

0 commit comments

Comments
 (0)