Skip to content

Commit d8c809c

Browse files
authored
Merge pull request swiftlang#68419 from glessard/se0405-part1
[se-0405] Implement API additions
2 parents 6bfbaa1 + ac47533 commit d8c809c

File tree

6 files changed

+384
-2
lines changed

6 files changed

+384
-2
lines changed

stdlib/public/core/String.swift

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,116 @@ extension String {
493493
self = String._fromNonContiguousUnsafeBitcastUTF8Repairing(codeUnits).0
494494
}
495495

496+
/// Creates a new string by copying and validating the sequence of
497+
/// code units passed in, according to the specified encoding.
498+
///
499+
/// This initializer does not try to repair ill-formed code unit sequences.
500+
/// If any are found, the result of the initializer is `nil`.
501+
///
502+
/// The following example calls this initializer with the contents of two
503+
/// different arrays---first with a well-formed UTF-8 code unit sequence and
504+
/// then with an ill-formed UTF-16 code unit sequence.
505+
///
506+
/// let validUTF8: [UInt8] = [67, 97, 0, 102, 195, 169]
507+
/// let valid = String(validating: validUTF8, as: UTF8.self)
508+
/// print(valid ?? "nil")
509+
/// // Prints "Café"
510+
///
511+
/// let invalidUTF16: [UInt16] = [0x41, 0x42, 0xd801]
512+
/// let invalid = String(validating: invalidUTF16, as: UTF16.self)
513+
/// print(invalid ?? "nil")
514+
/// // Prints "nil"
515+
///
516+
/// - Parameters:
517+
/// - codeUnits: A sequence of code units that encode a `String`
518+
/// - encoding: A conformer to `Unicode.Encoding` to be used
519+
/// to decode `codeUnits`.
520+
@inlinable
521+
@available(SwiftStdlib 5.11, *)
522+
public init?<Encoding: Unicode.Encoding>(
523+
validating codeUnits: some Sequence<Encoding.CodeUnit>,
524+
as encoding: Encoding.Type
525+
) {
526+
let contiguousResult = codeUnits.withContiguousStorageIfAvailable {
527+
String._validate($0, as: Encoding.self)
528+
}
529+
if let validationResult = contiguousResult {
530+
guard let validatedString = validationResult else {
531+
return nil
532+
}
533+
self = validatedString
534+
return
535+
}
536+
537+
// slow-path
538+
var transcoded: [UTF8.CodeUnit] = []
539+
transcoded.reserveCapacity(codeUnits.underestimatedCount)
540+
var isASCII = true
541+
let error = transcode(
542+
codeUnits.makeIterator(),
543+
from: Encoding.self,
544+
to: UTF8.self,
545+
stoppingOnError: true,
546+
into: {
547+
uint8 in
548+
transcoded.append(uint8)
549+
if isASCII && (uint8 & 0x80) == 0x80 { isASCII = false }
550+
}
551+
)
552+
if error { return nil }
553+
self = transcoded.withUnsafeBufferPointer{
554+
String._uncheckedFromUTF8($0, asciiPreScanResult: isASCII)
555+
}
556+
}
557+
558+
/// Creates a new string by copying and validating the sequence of
559+
/// code units passed in, according to the specified encoding.
560+
///
561+
/// This initializer does not try to repair ill-formed code unit sequences.
562+
/// If any are found, the result of the initializer is `nil`.
563+
///
564+
/// The following example calls this initializer with the contents of two
565+
/// different arrays---first with a well-formed UTF-8 code unit sequence and
566+
/// then with an ill-formed ASCII code unit sequence.
567+
///
568+
/// let validUTF8: [Int8] = [67, 97, 0, 102, -61, -87]
569+
/// let valid = String(validating: validUTF8, as: UTF8.self)
570+
/// print(valid ?? "nil")
571+
/// // Prints "Café"
572+
///
573+
/// let invalidASCII: [Int8] = [67, 97, -5]
574+
/// let invalid = String(validating: invalidASCII, as: Unicode.ASCII.self)
575+
/// print(invalid ?? "nil")
576+
/// // Prints "nil"
577+
///
578+
/// - Parameters:
579+
/// - codeUnits: A sequence of code units that encode a `String`
580+
/// - encoding: A conformer to `Unicode.Encoding` that can decode
581+
/// `codeUnits` as `UInt8`
582+
@inlinable
583+
@available(SwiftStdlib 5.11, *)
584+
public init?<Encoding>(
585+
validating codeUnits: some Sequence<Int8>,
586+
as encoding: Encoding.Type
587+
) where Encoding: Unicode.Encoding, Encoding.CodeUnit == UInt8 {
588+
let contiguousResult = codeUnits.withContiguousStorageIfAvailable {
589+
$0.withMemoryRebound(to: UInt8.self) {
590+
String._validate($0, as: Encoding.self)
591+
}
592+
}
593+
if let validationResult = contiguousResult {
594+
guard let validatedString = validationResult else {
595+
return nil
596+
}
597+
self = validatedString
598+
return
599+
}
600+
601+
// slow-path
602+
let uint8s = codeUnits.lazy.map(UInt8.init(bitPattern:))
603+
self.init(validating: uint8s, as: Encoding.self)
604+
}
605+
496606
/// Creates a new string with the specified capacity in UTF-8 code units, and
497607
/// then calls the given closure with a buffer covering the string's
498608
/// uninitialized memory.

stdlib/public/core/StringCreate.swift

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
@@ -298,4 +298,80 @@ extension String {
298298
String._uncheckedFromUTF8($0)
299299
}
300300
}
301+
302+
@usableFromInline
303+
@available(SwiftStdlib 5.11, *)
304+
internal static func _validate<Encoding: Unicode.Encoding>(
305+
_ input: UnsafeBufferPointer<Encoding.CodeUnit>,
306+
as encoding: Encoding.Type
307+
) -> String? {
308+
if encoding.CodeUnit.self == UInt8.self {
309+
let bytes = _identityCast(input, to: UnsafeBufferPointer<UInt8>.self)
310+
if encoding.self == UTF8.self {
311+
guard case .success(let info) = validateUTF8(bytes) else { return nil }
312+
return String._uncheckedFromUTF8(bytes, asciiPreScanResult: info.isASCII)
313+
} else if encoding.self == Unicode.ASCII.self {
314+
guard _allASCII(bytes) else { return nil }
315+
return String._uncheckedFromASCII(bytes)
316+
}
317+
}
318+
319+
// slow-path
320+
var isASCII = true
321+
var buffer: UnsafeMutableBufferPointer<UInt8>
322+
buffer = UnsafeMutableBufferPointer.allocate(capacity: input.count*3)
323+
var written = buffer.startIndex
324+
325+
var parser = Encoding.ForwardParser()
326+
var input = input.makeIterator()
327+
328+
transcodingLoop:
329+
while true {
330+
switch parser.parseScalar(from: &input) {
331+
case .valid(let s):
332+
let scalar = Encoding.decode(s)
333+
guard let utf8 = Unicode.UTF8.encode(scalar) else {
334+
// transcoding error: clean up and return nil
335+
fallthrough
336+
}
337+
if buffer.count < written + utf8.count {
338+
let newCapacity = buffer.count + (buffer.count >> 1)
339+
let copy: UnsafeMutableBufferPointer<UInt8>
340+
copy = UnsafeMutableBufferPointer.allocate(capacity: newCapacity)
341+
let copied = copy.moveInitialize(
342+
fromContentsOf: buffer.prefix(upTo: written)
343+
)
344+
buffer.deallocate()
345+
buffer = copy
346+
written = copied
347+
}
348+
if isASCII && utf8.count > 1 {
349+
isASCII = false
350+
}
351+
written = buffer.suffix(from: written).initialize(fromContentsOf: utf8)
352+
break
353+
case .error:
354+
// validation error: clean up and return nil
355+
buffer.prefix(upTo: written).deinitialize()
356+
buffer.deallocate()
357+
return nil
358+
case .emptyInput:
359+
break transcodingLoop
360+
}
361+
}
362+
363+
let storage = buffer.baseAddress.map {
364+
__SharedStringStorage(
365+
_mortal: $0,
366+
countAndFlags: _StringObject.CountAndFlags(
367+
count: buffer.startIndex.distance(to: written),
368+
isASCII: isASCII,
369+
isNFC: isASCII,
370+
isNativelyStored: false,
371+
isTailAllocated: false
372+
)
373+
)
374+
}
375+
return storage?.asString
376+
}
301377
}

stdlib/public/core/StringStorage.swift

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
@@ -681,6 +681,8 @@ final internal class __SharedStringStorage
681681

682682
internal var _breadcrumbs: _StringBreadcrumbs? = nil
683683

684+
internal var immortal = false
685+
684686
internal var count: Int { _countAndFlags.count }
685687

686688
internal init(
@@ -689,6 +691,7 @@ final internal class __SharedStringStorage
689691
) {
690692
self._owner = nil
691693
self.start = ptr
694+
self.immortal = true
692695
#if _pointerBitWidth(_64)
693696
self._countAndFlags = countAndFlags
694697
#elseif _pointerBitWidth(_32)
@@ -709,6 +712,32 @@ final internal class __SharedStringStorage
709712
return String(_StringGuts(self))
710713
}
711714
}
715+
716+
internal init(
717+
_mortal ptr: UnsafePointer<UInt8>,
718+
countAndFlags: _StringObject.CountAndFlags
719+
) {
720+
// ptr *must* be the start of an allocation
721+
self._owner = nil
722+
self.start = ptr
723+
self.immortal = false
724+
#if _pointerBitWidth(_64)
725+
self._countAndFlags = countAndFlags
726+
#elseif _pointerBitWidth(_32)
727+
self._count = countAndFlags.count
728+
self._countFlags = countAndFlags.flags
729+
#else
730+
#error("Unknown platform")
731+
#endif
732+
super.init()
733+
self._invariantCheck()
734+
}
735+
736+
deinit {
737+
if (_owner == nil) && !immortal {
738+
start.deallocate()
739+
}
740+
}
712741
}
713742

714743
extension __SharedStringStorage {

test/abi/macOS/arm64/stdlib.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ Added: _$ss19_getWeakRetainCountySuyXlF
4545
// Swift._getUnownedRetainCount(Swift.AnyObject) -> Swift.UInt
4646
Added: _$ss22_getUnownedRetainCountySuyXlF
4747

48+
// Swift.String.init<A, B where A: Swift._UnicodeEncoding, B: Swift.Sequence, A.CodeUnit == B.Element>(validating: B, as: A.Type) -> Swift.String?
49+
Added: _$sSS10validating2asSSSgq__xmtcs16_UnicodeEncodingRzSTR_7ElementQy_8CodeUnitRtzr0_lufC
50+
51+
// Swift.String.init<A, B where A: Swift._UnicodeEncoding, B: Swift.Sequence, A.CodeUnit == Swift.UInt8, B.Element == Swift.Int8>(validating: B, as: A.Type) -> Swift.String?
52+
Added: _$sSS10validating2asSSSgq__xmtcs16_UnicodeEncodingRzSTR_s5UInt8V8CodeUnitRtzs4Int8V7ElementRt_r0_lufC
53+
54+
// static Swift.String._validate<A where A: Swift._UnicodeEncoding>(_: Swift.UnsafeBufferPointer<A.CodeUnit>, as: A.Type) -> Swift.String?
55+
Added: _$sSS9_validate_2asSSSgSRy8CodeUnitQzG_xmts16_UnicodeEncodingRzlFZ
56+
4857
// class __StaticArrayStorage
4958
Added: _$ss20__StaticArrayStorageC12_doNotCallMeAByt_tcfC
5059
Added: _$ss20__StaticArrayStorageC12_doNotCallMeAByt_tcfCTj

test/abi/macOS/x86_64/stdlib.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ Added: _$ss19_getWeakRetainCountySuyXlF
4545
// Swift._getUnownedRetainCount(Swift.AnyObject) -> Swift.UInt
4646
Added: _$ss22_getUnownedRetainCountySuyXlF
4747

48+
// Swift.String.init<A, B where A: Swift._UnicodeEncoding, B: Swift.Sequence, A.CodeUnit == B.Element>(validating: B, as: A.Type) -> Swift.String?
49+
Added: _$sSS10validating2asSSSgq__xmtcs16_UnicodeEncodingRzSTR_7ElementQy_8CodeUnitRtzr0_lufC
50+
51+
// Swift.String.init<A, B where A: Swift._UnicodeEncoding, B: Swift.Sequence, A.CodeUnit == Swift.UInt8, B.Element == Swift.Int8>(validating: B, as: A.Type) -> Swift.String?
52+
Added: _$sSS10validating2asSSSgq__xmtcs16_UnicodeEncodingRzSTR_s5UInt8V8CodeUnitRtzs4Int8V7ElementRt_r0_lufC
53+
54+
// static Swift.String._validate<A where A: Swift._UnicodeEncoding>(_: Swift.UnsafeBufferPointer<A.CodeUnit>, as: A.Type) -> Swift.String?
55+
Added: _$sSS9_validate_2asSSSgSRy8CodeUnitQzG_xmts16_UnicodeEncodingRzlFZ
56+
4857
// class __StaticArrayStorage
4958
Added: _$ss20__StaticArrayStorageC12_doNotCallMeAByt_tcfC
5059
Added: _$ss20__StaticArrayStorageC12_doNotCallMeAByt_tcfCTj

0 commit comments

Comments
 (0)