@@ -435,6 +435,22 @@ extension String.UTF16View: BidirectionalCollection {
435
435
436
436
return _foreignSubscript ( position: idx)
437
437
}
438
+
439
+ internal subscript( nativeNonASCIIOffset offset: Int ) -> UTF16 . CodeUnit {
440
+ @_effects ( releasenone) get {
441
+ let threshold = _breadcrumbStride / 2
442
+ // Do not use breadcrumbs if directly computing the result is expected
443
+ // to be cheaper
444
+ let idx = offset < threshold ?
445
+ _index ( startIndex, offsetBy: offset) . _knownUTF8 :
446
+ _nativeGetIndex ( for: offset)
447
+ _precondition ( idx. _encodedOffset < _guts. count,
448
+ " String index is out of bounds " )
449
+ let scalar = _guts. fastUTF8Scalar (
450
+ startingAt: _guts. scalarAlign ( idx) . _encodedOffset)
451
+ return scalar. utf16 [ idx. transcodedOffset]
452
+ }
453
+ }
438
454
}
439
455
440
456
extension String . UTF16View {
@@ -948,6 +964,21 @@ extension String.UTF16View {
948
964
fatalError ( )
949
965
}
950
966
}
967
+
968
+ // See _nativeCopy(into:alignedRange:), except this uses un-verified UTF16
969
+ // offsets instead of aligned indexes
970
+ internal func _nativeCopy(
971
+ into buffer: UnsafeMutableBufferPointer < UInt16 > ,
972
+ offsetRange range: Range < Int >
973
+ ) {
974
+ let alignedRange = _indexRange ( for: range, from: startIndex)
975
+ _precondition ( alignedRange. lowerBound. _encodedOffset <= _guts. count &&
976
+ alignedRange. upperBound. _encodedOffset <= _guts. count,
977
+ " String index is out of bounds " )
978
+ unsafe _nativeCopy(
979
+ into: buffer,
980
+ alignedRange: alignedRange. lowerBound ..< alignedRange. upperBound)
981
+ }
951
982
952
983
// Copy (i.e. transcode to UTF-16) our contents into a buffer. `alignedRange`
953
984
// means that the indices are part of the UTF16View.indices -- they are either
@@ -962,16 +993,16 @@ extension String.UTF16View {
962
993
range. lowerBound == _utf16AlignNativeIndex ( range. lowerBound) )
963
994
_internalInvariant (
964
995
range. upperBound == _utf16AlignNativeIndex ( range. upperBound) )
965
-
996
+
966
997
if _slowPath ( range. isEmpty) { return }
967
-
998
+
968
999
let isASCII = _guts. isASCII
969
1000
return unsafe _guts. withFastUTF8 { utf8 in
970
1001
var writeIdx = 0
971
1002
let writeEnd = buffer. count
972
1003
var readIdx = range. lowerBound. _encodedOffset
973
1004
let readEnd = range. upperBound. _encodedOffset
974
-
1005
+
975
1006
if isASCII {
976
1007
_internalInvariant ( range. lowerBound. transcodedOffset == 0 )
977
1008
_internalInvariant ( range. upperBound. transcodedOffset == 0 )
@@ -984,7 +1015,7 @@ extension String.UTF16View {
984
1015
}
985
1016
return
986
1017
}
987
-
1018
+
988
1019
// Handle mid-transcoded-scalar initial index
989
1020
if _slowPath( range. lowerBound. transcodedOffset != 0 ) {
990
1021
_internalInvariant ( range. lowerBound. transcodedOffset == 1 )
@@ -995,7 +1026,7 @@ extension String.UTF16View {
995
1026
readIdx &+= len
996
1027
writeIdx &+= 1
997
1028
}
998
-
1029
+
999
1030
// Transcode middle
1000
1031
while readIdx < readEnd {
1001
1032
let ( scalar, len) = unsafe _decodeScalar( utf8, startingAt: readIdx)
@@ -1009,13 +1040,13 @@ extension String.UTF16View {
1009
1040
writeIdx &+= 1
1010
1041
}
1011
1042
}
1012
-
1043
+
1013
1044
// Handle mid-transcoded-scalar final index
1014
1045
if _slowPath ( range. upperBound. transcodedOffset == 1 ) {
1015
1046
_internalInvariant ( writeIdx < writeEnd)
1016
1047
let ( scalar, _) = unsafe _decodeScalar( utf8, startingAt: readIdx)
1017
1048
_internalInvariant ( scalar. utf16. count == 2 )
1018
-
1049
+
1019
1050
// Note: this is intentionally not using the _unchecked subscript.
1020
1051
// (We rely on debug assertions to catch out of bounds access.)
1021
1052
unsafe buffer[ writeIdx] = scalar. utf16 [ 0 ]
0 commit comments