Skip to content

Commit cf73dd9

Browse files
committed
Merge pull request #2642 from natecook1000/nc-revise-strings
[stdlib] Revise documentation for string-related types & protocols
2 parents 467ae9f + dee12dc commit cf73dd9

37 files changed

+3019
-691
lines changed

stdlib/public/core/Arrays.swift.gyb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -482,8 +482,8 @@ public struct ${Self}<Element>
482482
%end
483483
}
484484

485-
/// The array's "past the end" position, or one greater than the last valid
486-
/// subscript argument.
485+
/// The array's "past the end" position---that is, the position one greater
486+
/// than the last valid subscript argument.
487487
///
488488
/// When you need a range that includes the last element of an array, use the
489489
/// half-open range operator (`..<`) with `endIndex`. The `..<` operator
@@ -934,7 +934,7 @@ extension ${Self} : ArrayLiteralConvertible {
934934
// Optimized implementation for Array
935935
/// Creates an array from the given array literal.
936936
///
937-
/// Don't directly call this initializer, which is used by the compiler
937+
/// Do not call this initializer directly. It is used by the compiler
938938
/// when you use an array literal. Instead, create a new array by using an
939939
/// array literal as its value. To do this, enclose a comma-separated list of
940940
/// values in square brackets.
@@ -951,7 +951,7 @@ extension ${Self} : ArrayLiteralConvertible {
951951
%else:
952952
/// Creates an array from the given array literal.
953953
///
954-
/// Don't directly call this initializer, which is used by the compiler when
954+
/// Do not call this initializer directly. It is used by the compiler when
955955
/// you use an array literal. Instead, create a new array by using an array
956956
/// literal as its value. To do this, enclose a comma-separated list of
957957
/// values in square brackets.

stdlib/public/core/Bool.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ extension Bool : _BuiltinBooleanLiteralConvertible, BooleanLiteralConvertible {
7979

8080
/// Creates an instance initialized to the specified Boolean literal.
8181
///
82-
/// Don't directly call this initializer, which is used by the compiler when
82+
/// Do not call this initializer directly. It is used by the compiler when
8383
/// you use a Boolean literal. Instead, create a new `Bool` instance by
8484
/// using one of the Boolean literals `true` and `false`.
8585
///

stdlib/public/core/CString.swift

Lines changed: 92 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,64 @@ import SwiftShims
1616

1717
extension String {
1818

19-
/// Create a new `String` by copying the nul-terminated UTF-8 data
20-
/// referenced by a `cString`.
19+
/// Creates a new string by copying the null-terminated UTF-8 data referenced
20+
/// by the given pointer.
2121
///
22-
/// If `cString` contains ill-formed UTF-8 code unit sequences, replaces them
23-
/// with replacement characters (U+FFFD).
22+
/// If `cString` contains ill-formed UTF-8 code unit sequences, this
23+
/// initializer replaces them with the Unicode replacement character
24+
/// (`"\u{FFFD}"`).
2425
///
25-
/// - Precondition: `cString != nil`
26+
/// The following example calls this initializer with pointers to the
27+
/// contents of two different `CChar` arrays---the first with well-formed
28+
/// UTF-8 code unit sequences and the second with an ill-formed sequence at
29+
/// the end.
30+
///
31+
/// let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0]
32+
/// validUTF8.withUnsafeBufferPointer { ptr in
33+
/// let s = String(cString: ptr.baseAddress!)
34+
/// print(s)
35+
/// }
36+
/// // Prints "Café"
37+
///
38+
/// let invalidUTF8: [CChar] = [67, 97, 102, -61, 0]
39+
/// invalidUTF8.withUnsafeBufferPointer { ptr in
40+
/// let s = String(cString: ptr.baseAddress!)
41+
/// print(s)
42+
/// }
43+
/// // Prints "Caf�"
44+
///
45+
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
2646
public init(cString: UnsafePointer<CChar>) {
2747
self = String.decodeCString(UnsafePointer(cString), as: UTF8.self,
2848
repairingInvalidCodeUnits: true)!.result
2949
}
3050

31-
/// Create a new `String` by copying the nul-terminated UTF-8 data
32-
/// referenced by a `cString`.
51+
/// Creates a new string by copying and validating the null-terminated UTF-8
52+
/// data referenced by the given pointer.
53+
///
54+
/// This initializer does not try to repair ill-formed UTF-8 code unit
55+
/// sequences. If any are found, the result of the initializer is `nil`.
56+
///
57+
/// The following example calls this initializer with pointers to the
58+
/// contents of two different `CChar` arrays---the first with well-formed
59+
/// UTF-8 code unit sequences and the second with an ill-formed sequence at
60+
/// the end.
61+
///
62+
/// let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0]
63+
/// validUTF8.withUnsafeBufferPointer { ptr in
64+
/// let s = String(validatingUTF8: ptr.baseAddress!)
65+
/// print(s)
66+
/// }
67+
/// // Prints "Optional(Café)"
3368
///
34-
/// Does not try to repair ill-formed UTF-8 code unit sequences, fails if any
35-
/// such sequences are found.
69+
/// let invalidUTF8: [CChar] = [67, 97, 102, -61, 0]
70+
/// invalidUTF8.withUnsafeBufferPointer { ptr in
71+
/// let s = String(validatingUTF8: ptr.baseAddress!)
72+
/// print(s)
73+
/// }
74+
/// // Prints "nil"
3675
///
37-
/// - Precondition: `cString != nil`
76+
/// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence.
3877
public init?(validatingUTF8 cString: UnsafePointer<CChar>) {
3978
guard let (result, _) = String.decodeCString(
4079
UnsafePointer(cString),
@@ -45,12 +84,50 @@ extension String {
4584
self = result
4685
}
4786

48-
/// Create a new `String` by copying the nul-terminated data
49-
/// referenced by a `cString` using `encoding`.
87+
/// Creates a new string by copying the null-terminated data referenced by
88+
/// the given pointer using the specified encoding.
89+
///
90+
/// When you pass `true` as `isRepairing`, this method replaces ill-formed
91+
/// sequences with the Unicode replacement character (`"\u{FFFD}"`);
92+
/// otherwise, an ill-formed sequence causes this method to stop decoding
93+
/// and return `nil`.
94+
///
95+
/// The following example calls this method with pointers to the contents of
96+
/// two different `CChar` arrays---the first with well-formed UTF-8 code
97+
/// unit sequences and the second with an ill-formed sequence at the end.
98+
///
99+
/// let validUTF8: [UInt8] = [67, 97, 102, 195, 169, 0]
100+
/// validUTF8.withUnsafeBufferPointer { ptr in
101+
/// let s = String.decodeCString(ptr.baseAddress,
102+
/// as: UTF8.self,
103+
/// repairingInvalidCodeUnits: true)
104+
/// print(s)
105+
/// }
106+
/// // Prints "Optional((Café, false))"
107+
///
108+
/// let invalidUTF8: [UInt8] = [67, 97, 102, 195, 0]
109+
/// invalidUTF8.withUnsafeBufferPointer { ptr in
110+
/// let s = String.decodeCString(ptr.baseAddress,
111+
/// as: UTF8.self,
112+
/// repairingInvalidCodeUnits: true)
113+
/// print(s)
114+
/// }
115+
/// // Prints "Optional((Caf�, true))"
116+
///
117+
/// - Parameters:
118+
/// - cString: A pointer to a null-terminated code sequence encoded in
119+
/// `encoding`.
120+
/// - encoding: The Unicode encoding of the data referenced by `cString`.
121+
/// - isRepairing: Pass `true` to create a new string, even when the data
122+
/// referenced by `cString` contains ill-formed sequences. Ill-formed
123+
/// sequences are replaced with the Unicode replacement character
124+
/// (`"\u{FFFD}"`). Pass `false` to interrupt the creation of the new
125+
/// string if an ill-formed sequence is detected.
126+
/// - Returns: A tuple with the new string and a Boolean value that indicates
127+
/// whether any repairs were made. If `isRepairing` is `false` and an
128+
/// ill-formed sequence is detected, this method returns `nil`.
50129
///
51-
/// Returns `nil` if the `cString` is `nil` or if it contains ill-formed code
52-
/// units and no repairing has been requested. Otherwise replaces
53-
/// ill-formed code units with replacement characters (U+FFFD).
130+
/// - SeeAlso: `UnicodeCodec`
54131
public static func decodeCString<Encoding : UnicodeCodec>(
55132
_ cString: UnsafePointer<Encoding.CodeUnit>?,
56133
as encoding: Encoding.Type,

stdlib/public/core/Character.swift

Lines changed: 91 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,56 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
/// `Character` represents some Unicode grapheme cluster as
14-
/// defined by a canonical, localized, or otherwise tailored
15-
/// segmentation algorithm.
13+
/// A single extended grapheme cluster, which approximates a user-perceived
14+
/// character.
15+
///
16+
/// The `Character` type represents a character made up of one or more Unicode
17+
/// scalar values, grouped by a Unicode boundary algorithm. Generally, a
18+
/// `Character` instance matches what the reader of a string will perceive as
19+
/// a single character. The number of visible characters is generally the most
20+
/// natural way to count the length of a string.
21+
///
22+
/// let greeting = "Hello! 🐥"
23+
/// print("Character count: \(greeting.characters.count)")
24+
/// // Prints "Character count: 8"
25+
///
26+
/// Because each character in a string can be made up of one or more Unicode
27+
/// code points, the number of characters in a string may not match the length
28+
/// of the Unicode code point representation or the length of the string in a
29+
/// particular binary representation.
30+
///
31+
/// print("Unicode code point count: \(greeting.unicodeScalars.count)")
32+
/// // Prints "Unicode code point count: 15"
33+
///
34+
/// print("UTF-8 representation count: \(greeting.utf8.count)")
35+
/// // Prints "UTF-8 representation count: 18"
36+
///
37+
/// Every `Character` instance is composed of one or more Unicode code points
38+
/// that are grouped together as an *extended grapheme cluster*. The way these
39+
/// code points are grouped is defined by a canonical, localized, or otherwise
40+
/// tailored Unicode segmentation algorithm.
41+
///
42+
/// For example, a country's Unicode flag character is made up of two regional
43+
/// indicator code points that correspond to that country's ISO 3166-1 alpha-2
44+
/// code. The alpha-2 code for The United States is "US", so its flag
45+
/// character is made up of the Unicode code points `"\u{1F1FA}"` (REGIONAL
46+
/// INDICATOR SYMBOL LETTER U) and `"\u{1F1F8}"` (REGIONAL INDICATOR SYMBOL
47+
/// LETTER S). When placed next to each other in a Swift string literal, these
48+
/// two code points are combined into a single grapheme cluster, represented
49+
/// by a `Character` instance in Swift.
50+
///
51+
/// let usFlag: Character = "\u{1F1FA}\u{1F1F8}"
52+
/// print(usFlag)
53+
/// // Prints "🇺🇸"
54+
///
55+
/// For more information about the Unicode terms used in this discussion, see
56+
/// the [Unicode.org glossary][glossary]. In particular, this discussion
57+
/// mentions [extended grapheme clusters][clusters] and [Unicode scalar
58+
/// values][scalars].
59+
///
60+
/// [glossary]: http://www.unicode.org/glossary/
61+
/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster
62+
/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value
1663
public struct Character :
1764
_BuiltinExtendedGraphemeClusterLiteralConvertible,
1865
ExtendedGraphemeClusterLiteralConvertible, Equatable, Hashable, Comparable {
@@ -33,7 +80,9 @@ public struct Character :
3380
case small(Builtin.Int63)
3481
}
3582

36-
/// Construct a `Character` containing just the given `scalar`.
83+
/// Creates a character containing the given Unicode scalar value.
84+
///
85+
/// - Parameter scalar: The Unicode scalar value to convert into a character.
3786
public init(_ scalar: UnicodeScalar) {
3887
var asInt: UInt64 = 0
3988
var shift: UInt64 = 0
@@ -55,7 +104,17 @@ public struct Character :
55104
UTF32.self, input: CollectionOfOne(UInt32(value))))
56105
}
57106

58-
/// Create an instance initialized to `value`.
107+
/// Creates a character with the specified value.
108+
///
109+
/// Don't call this initializer directly. It is used by the compiler when you
110+
/// use a string literal to initialize a `Character` instance. For example:
111+
///
112+
/// let snowflake: Character = "❄︎"
113+
/// print(snowflake)
114+
/// // Prints "❄︎"
115+
///
116+
/// The assignment to the `snowflake` constant calls this initializer behind
117+
/// the scenes.
59118
public init(unicodeScalarLiteral value: Character) {
60119
self = value
61120
}
@@ -73,14 +132,31 @@ public struct Character :
73132
isASCII: isASCII))
74133
}
75134

76-
/// Create an instance initialized to `value`.
135+
/// Creates a character with the specified value.
136+
///
137+
/// Don't call this initializer directly. It is used by the compiler when you
138+
/// use a string literal to initialize a `Character` instance. For example:
139+
///
140+
/// let oBreve: Character = "o\u{306}"
141+
/// print(oBreve)
142+
/// // Prints "ŏ"
143+
///
144+
/// The assignment to the `oBreve` constant calls this initializer behind the
145+
/// scenes.
77146
public init(extendedGraphemeClusterLiteral value: Character) {
78147
self = value
79148
}
80149

81-
/// Create an instance from a single-character `String`.
150+
/// Creates a character from a single-character string.
151+
///
152+
/// The following example creates a new character from the uppercase version
153+
/// of a string that only holds one character.
154+
///
155+
/// let a = "a"
156+
/// let capitalA = Character(a.uppercased())
82157
///
83-
/// - Precondition: `s` contains exactly one extended grapheme cluster.
158+
/// - Parameter s: The single-character string to convert to a `Character`
159+
/// instance. `s` must contain exactly one extended grapheme cluster.
84160
public init(_ s: String) {
85161
// The small representation can accept up to 8 code units as long
86162
// as the last one is a continuation. Since the high bit of the
@@ -258,13 +334,10 @@ public struct Character :
258334
var data: UInt64
259335
}
260336

261-
/// The hash value.
337+
/// The character's hash value.
262338
///
263-
/// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`.
264-
///
265-
/// - Note: The hash value is not guaranteed to be stable across
266-
/// different invocations of the same program. Do not persist the
267-
/// hash value across program runs.
339+
/// Hash values are not guaranteed to be equal across different executions of
340+
/// your program. Do not save hash values to use during a future execution.
268341
public var hashValue: Int {
269342
// FIXME(performance): constructing a temporary string is extremely
270343
// wasteful and inefficient.
@@ -281,14 +354,16 @@ public struct Character :
281354
}
282355

283356
extension Character : CustomDebugStringConvertible {
284-
/// A textual representation of `self`, suitable for debugging.
357+
/// A textual representation of the character, suitable for debugging.
285358
public var debugDescription: String {
286359
return String(self).debugDescription
287360
}
288361
}
289362

290363
extension String {
291-
/// Construct an instance containing just the given `Character`.
364+
/// Creates a string containing the given character.
365+
///
366+
/// - Parameter c: The character to convert to a string.
292367
public init(_ c: Character) {
293368
switch c._representation {
294369
case let .small(_63bits):

stdlib/public/core/ClosedRange.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,8 @@ public struct CountableClosedRange<
212212
return ClosedRangeIndex(lowerBound)
213213
}
214214

215-
/// The range's "past the end" position, or one greater than the last valid
216-
/// subscript argument.
215+
/// The range's "past the end" position---that is, the position one greater
216+
/// than the last valid subscript argument.
217217
public var endIndex: ClosedRangeIndex<Bound> {
218218
return ClosedRangeIndex()
219219
}

stdlib/public/core/Collection.swift

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ public protocol IndexableBase {
3838
/// If the collection is empty, `startIndex` is equal to `endIndex`.
3939
var startIndex: Index { get }
4040

41-
/// The collection's "past the end" position, or one greater than the last
42-
/// valid subscript argument.
41+
/// The collection's "past the end" position---that is, the position one
42+
/// greater than the last valid subscript argument.
4343
///
4444
/// When you need a range that includes the last element of a collection, use
4545
/// the half-open range operator (`..<`) with `endIndex`. The `..<` operator
@@ -157,8 +157,11 @@ public protocol IndexableBase {
157157
/// In most cases, it's best to ignore this protocol and use the `Collection`
158158
/// protocol instead, because it has a more complete interface.
159159
public protocol Indexable : IndexableBase {
160-
/// A type that can represent the number of steps between a pair of
161-
/// indices.
160+
/// A type used to represent the number of steps between two indices, where
161+
/// one value is reachable from the other.
162+
///
163+
/// In Swift, *reachability* refers to the ability to produce one value from
164+
/// the other through zero or more applications of `index(after:)`.
162165
associatedtype IndexDistance : SignedInteger = Int
163166

164167
/// Returns an index that is the specified distance from the given index.

stdlib/public/core/CollectionOfOne.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,11 @@ public struct CollectionOfOne<Element>
5050
return 0
5151
}
5252

53-
/// The "past the end" position; always identical to
54-
/// `index(after: startIndex)`.
53+
/// The "past the end" position---that is, the position one greater than the
54+
/// last valid subscript argument.
5555
///
56-
/// - Note: `endIndex` is not a valid argument to `subscript`.
56+
/// In a `CollectionOfOne` instance, `endIndex` is always identical to
57+
/// `index(after: startIndex)`.
5758
public var endIndex: Int {
5859
return 1
5960
}

0 commit comments

Comments
 (0)