@@ -311,55 +311,36 @@ func _isSwiftStdlib_5_7() -> Bool {
311
311
312
312
// Encoding
313
313
extension _StringGuts {
314
- /// Returns whether this string is known to use UTF-16 code units .
314
+ /// Returns whether this string has a UTF-8 storage representation .
315
315
///
316
- /// This always returns a value corresponding to the string's actual encoding
317
- /// on stdlib versions >=5.7.
316
+ /// This always returns a value corresponding to the string's actual encoding.
317
+ @_alwaysEmitIntoClient
318
+ @inline ( __always)
319
+ internal var isUTF8 : Bool { _object. isUTF8 }
320
+
321
+ /// Returns whether this string has a UTF-16 storage representation.
318
322
///
319
- /// Standard Library versions <=5.6 did not set the corresponding flag, so
320
- /// this property always returns false.
323
+ /// This always returns a value corresponding to the string's actual encoding.
321
324
@_alwaysEmitIntoClient
322
325
@inline ( __always)
323
- internal var isKnownUTF16 : Bool { _object. isKnownUTF16 }
326
+ internal var isUTF16 : Bool { _object. isUTF16 }
324
327
325
328
@_alwaysEmitIntoClient // Swift 5.7
326
329
internal func markEncoding( _ i: String . Index ) -> String . Index {
327
- // In this inlinable function, we cannot assume that all foreign strings are
328
- // UTF-16 encoded, as this code may run on a future stdlib that may have
329
- // introduced other foreign forms.
330
- if #available( macOS 9999 , iOS 9999 , watchOS 9999 , tvOS 9999 , * ) { // SwiftStdlib 5.7
331
- // With a >=5.7 stdlib, we can rely on `isKnownUTF16` to contain the truth.
332
- return isKnownUTF16 ? i. _knownUTF16 : i. _knownUTF8
333
- }
334
- // We know that in stdlibs 5.0..<5.7, all foreign strings were UTF-16,
335
- // so we can use `isForeign` to determine the encoding.
336
- return isForeign ? i. _knownUTF16 : i. _knownUTF8
337
- }
338
-
339
- @inline ( __always)
340
- internal func internalMarkEncoding( _ i: String . Index ) -> String . Index {
341
- // This code is behind a resiliance boundary, so it always runs on a >=5.7
342
- // stdlib. Note though that it doesn't match the 5.7+ case in the inlinable
343
- // version above!
344
- //
345
- // We know that in this version of the stdlib, foreign strings happen to
346
- // always be UTF-16 encoded (like they were between 5.0 and 5.6), and
347
- // looking at `isForeign` instead of `isKnownUTF16` may allow the stdlib's
348
- // internal code to be better optimized -- so let's do that.
349
- isForeign ? i. _knownUTF16 : i. _knownUTF8
330
+ isUTF8 ? i. _knownUTF8 : i. _knownUTF16
350
331
}
351
332
352
333
/// Returns true if the encoding of the given index isn't known to be in
353
334
/// conflict with this string's encoding.
354
335
///
355
- /// If the index or the string was created by code that was built on stdlibs
356
- /// below 5.7, then this check may incorrectly return true on a mismatching
357
- /// index, but it is guaranteed to never incorrectly return false. If all
358
- /// loaded binaries were built in 5.7+, then this method is guaranteed to
359
- /// always return the correct value.
360
- @_alwaysEmitIntoClient
336
+ /// If the index was created by code that was built on a stdlib below 5.7,
337
+ /// then this check may incorrectly return true on a mismatching index, but it
338
+ /// is guaranteed to never incorrectly return false. If all loaded binaries
339
+ /// were built in 5.7+, then this method is guaranteed to always return the
340
+ /// correct value.
341
+ @_alwaysEmitIntoClient @ inline ( __always )
361
342
internal func hasMatchingEncoding( _ i: String . Index ) -> Bool {
362
- ( isForeign && i. _canBeUTF16 ) || ( !isForeign && i . _canBeUTF8 )
343
+ isUTF8 ? i. _canBeUTF8 : i . _canBeUTF16
363
344
}
364
345
365
346
/// Return an index whose encoding can be assumed to match that of `self`.
@@ -371,22 +352,20 @@ extension _StringGuts {
371
352
@_alwaysEmitIntoClient
372
353
@inline ( __always)
373
354
internal func ensureMatchingEncoding( _ i: String . Index ) -> String . Index {
374
- if _fastPath ( !isForeign && i . _canBeUTF8 ) { return i }
355
+ if _fastPath ( hasMatchingEncoding ( i ) ) { return i }
375
356
return _slowEnsureMatchingEncoding ( i)
376
357
}
377
358
378
359
@_alwaysEmitIntoClient
379
360
@inline ( never)
380
361
internal func _slowEnsureMatchingEncoding( _ i: String . Index ) -> String . Index {
381
- _internalInvariant ( isForeign || !i. _canBeUTF8)
382
- if isForeign {
383
- // Opportunistically detect attempts to use an UTF-8 index on a UTF-16
384
- // string. Strings don't usually get converted to UTF-16 storage, so it
385
- // seems okay to trap in this case -- the index most likely comes from an
386
- // unrelated string. (Trapping here may still turn out to affect binary
387
- // compatibility with broken code in existing binaries running with new
388
- // stdlibs. If so, we can replace this with the same transcoding hack as
389
- // in the UTF-16->8 case below.)
362
+ guard isUTF8 else {
363
+ // Attempt to use an UTF-8 index on a UTF-16 string. Strings don't usually
364
+ // get converted to UTF-16 storage, so it seems okay to trap in this case
365
+ // -- the index most likely comes from an unrelated string. (Trapping here
366
+ // may still turn out to affect binary compatibility with broken code in
367
+ // existing binaries running with new stdlibs. If so, we can replace this
368
+ // with the same transcoding hack as in the UTF-16->8 case below.)
390
369
//
391
370
// Note that this trap is not guaranteed to trigger when the process
392
371
// includes client binaries compiled with a previous Swift release.
@@ -397,13 +376,9 @@ extension _StringGuts {
397
376
//
398
377
// This trap can never trigger on OSes that have stdlibs <= 5.6, because
399
378
// those versions never set the `isKnownUTF16` flag in `_StringObject`.
400
- //
401
- _precondition ( !isKnownUTF16 || i. _canBeUTF16,
402
- " Invalid string index " )
403
- return i
379
+ _preconditionFailure ( " Invalid string index " )
404
380
}
405
- // If we get here, then we know for sure that this is an attempt to use an
406
- // UTF-16 index on a UTF-8 string.
381
+ // Attempt to use an UTF-16 index on a UTF-8 string.
407
382
//
408
383
// This can happen if `self` was originally verbatim-bridged, and someone
409
384
// mistakenly attempts to keep using an old index after a mutation. This is
0 commit comments