Skip to content

Commit bed6408

Browse files
milsemanamartini51
andauthored
Better docs for UTF8Span (#83184)
Co-authored-by: Alex Martini <[email protected]>
1 parent 44a6811 commit bed6408

File tree

5 files changed

+64
-14
lines changed

5 files changed

+64
-14
lines changed

stdlib/public/core/UTF8EncodingError.swift

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,6 @@ extension Unicode.UTF8 {
7070
errors (including overlong encodings, surrogates, and invalid code
7171
points), it will produce an error per byte.
7272

73-
// FIXME: without a checkAllErrors, we don't have these classification distinctions, should we drop it, ensure we will do it, or what?
74-
7573
Since overlong encodings, surrogates, and invalid code points are erroneous
7674
by the second byte (at the latest), the above definition produces the same
7775
ranges as defining such a sequence as a truncated scalar error followed by

stdlib/public/core/UTF8Span.swift

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// TODO: comment header
22

33

4-
/// TODO: docs
4+
/// A borrowed view into contiguous memory that contains validly-encoded UTF-8 code units.
55
@frozen
66
@safe
77
@available(SwiftStdlib 6.2, *)
@@ -71,6 +71,8 @@ extension UTF8Span {
7171
/// valid UTF-8, otherwise throws an error.
7272
///
7373
/// The resulting UTF8Span has the same lifetime constraints as `codeUnits`.
74+
///
75+
/// - Complexity: O(n)
7476
@lifetime(copy codeUnits)
7577
public init(
7678
validating codeUnits: consuming Span<UInt8>
@@ -174,10 +176,16 @@ extension UTF8Span {
174176

175177
@available(SwiftStdlib 6.2, *)
176178
extension UTF8Span {
179+
/// A Boolean value that indicates whether the UTF-8 span is empty.
180+
///
181+
/// - Complexity: O(1)
177182
public var isEmpty: Bool {
178183
self.count == 0
179184
}
180185

186+
/// A span used to access the code units.
187+
///
188+
/// - Complexity: O(1)
181189
public var span: Span<UInt8> {
182190
@lifetime(copy self)
183191
get {
@@ -190,7 +198,11 @@ extension UTF8Span {
190198
}
191199

192200
extension String {
193-
201+
/// Creates a new string, copying the specified code units.
202+
///
203+
/// This initializer skips UTF-8 validation because `codeUnits` must contain valid UTF-8.
204+
///
205+
/// - Complexity: O(n)
194206
@available(SwiftStdlib 6.2, *)
195207
public init(copying codeUnits: UTF8Span) {
196208
let isASCII = codeUnits.isKnownASCII

stdlib/public/core/UTF8SpanBits.swift

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ extension UTF8Span {
1212
/// contained non-ASCII content would report false for `isKnownASCII`, even
1313
/// if that String had subsequent mutation operations that removed any
1414
/// non-ASCII content.
15+
///
16+
/// - Complexity: O(1)
1517
@_alwaysEmitIntoClient
1618
public var isKnownASCII: Bool {
1719
0 != _countAndFlags & Self._asciiBit
@@ -20,6 +22,8 @@ extension UTF8Span {
2022
/// Do a scan checking for whether the contents are all-ASCII.
2123
///
2224
/// Updates the `isKnownASCII` bit if contents are all-ASCII.
25+
///
26+
/// - Complexity: O(n)
2327
@lifetime(self: copy self)
2428
public mutating func checkForASCII() -> Bool {
2529
if isKnownASCII { return true }
@@ -35,7 +39,8 @@ extension UTF8Span {
3539

3640
/// Returns whether the contents are known to be NFC. This is not
3741
/// always checked at initialization time and is set by `checkForNFC`.
38-
// TODO: should this be @_unavailableInEmbedded
42+
///
43+
/// - Complexity: O(1)
3944
@_alwaysEmitIntoClient
4045
public var isKnownNFC: Bool {
4146
0 != _countAndFlags & Self._nfcBit
@@ -64,6 +69,8 @@ extension UTF8Span {
6469
/// algorithm. However, it cannot detect all NFC contents.
6570
///
6671
/// Updates the `isKnownNFC` bit.
72+
///
73+
/// - Complexity: O(n)
6774
@_unavailableInEmbedded
6875
@lifetime(self: copy self)
6976
public mutating func checkForNFC(
@@ -117,6 +124,9 @@ extension UTF8Span {
117124
0xFF00_0000_0000_0000
118125
}
119126

127+
/// The number of UTF-8 code units in the span.
128+
///
129+
/// - Complexity: O(1)
120130
@_alwaysEmitIntoClient
121131
public var count: Int {
122132
Int(truncatingIfNeeded: _countAndFlags & Self._countMask)

stdlib/public/core/UTF8SpanComparisons.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,16 @@
44
@available(SwiftStdlib 6.2, *)
55
extension UTF8Span {
66
/// Whether this span has the same bytes as `other`.
7+
///
8+
/// - Complexity: O(n)
79
@_alwaysEmitIntoClient
810
public func bytesEqual(to other: some Sequence<UInt8>) -> Bool {
911
unsafe _withUnsafeBufferPointer { unsafe $0.elementsEqual(other) }
1012
}
1113

1214
/// Whether this span has the same `Unicode.Scalar`s as `other`.
15+
///
16+
/// - Complexity: O(n)
1317
@_alwaysEmitIntoClient
1418
public func unicodeScalarsEqual(
1519
to other: some Sequence<Unicode.Scalar>
@@ -31,6 +35,8 @@ extension UTF8Span {
3135
}
3236

3337
/// Whether this span has the same `Character`s as `other`.
38+
///
39+
/// - Complexity: O(n)
3440
@_unavailableInEmbedded
3541
@_alwaysEmitIntoClient
3642
public func charactersEqual(
@@ -54,6 +60,8 @@ extension UTF8Span {
5460
extension UTF8Span {
5561
/// Whether `self` is equivalent to `other` under Unicode Canonical
5662
/// Equivalence.
63+
///
64+
/// - Complexity: O(n)
5765
public func isCanonicallyEquivalent(
5866
to other: UTF8Span
5967
) -> Bool {
@@ -70,6 +78,8 @@ extension UTF8Span {
7078

7179
/// Whether `self` orders less than `other` under Unicode Canonical
7280
/// Equivalence using normalized code-unit order (in NFC).
81+
///
82+
/// - Complexity: O(n)
7383
public func isCanonicallyLessThan(
7484
_ other: UTF8Span
7585
) -> Bool {

stdlib/public/core/UTF8SpanIterators.swift

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ extension UTF8Span {
99
.init(self)
1010
}
1111

12+
// **TODO**: Examples in below doc
13+
1214
/// Iterate the `Unicode.Scalar`s contents of a `UTF8Span`.
13-
///
14-
/// **TODO**: Examples
1515
@frozen
1616
public struct UnicodeScalarIterator: ~Escapable {
1717
public let codeUnits: UTF8Span
@@ -37,6 +37,8 @@ extension UTF8Span {
3737
/// of the next scalar.
3838
///
3939
/// Returns `nil` if at the end of the `UTF8Span`.
40+
///
41+
/// - Complexity: O(1)
4042
@lifetime(self: copy self)
4143
public mutating func next() -> Unicode.Scalar? {
4244
guard currentCodeUnitOffset < codeUnits.count else {
@@ -55,6 +57,8 @@ extension UTF8Span {
5557
/// previous scalar.
5658
///
5759
/// Returns `nil` if at the start of the `UTF8Span`.
60+
///
61+
/// - Complexity: O(1)
5862
@lifetime(self: copy self)
5963
public mutating func previous() -> Unicode.Scalar? {
6064
guard currentCodeUnitOffset > 0 else {
@@ -73,6 +77,8 @@ extension UTF8Span {
7377
///
7478
/// Returns the number of `Unicode.Scalar`s skipped over, which can be 0
7579
/// if at the end of the UTF8Span.
80+
///
81+
/// - Complexity: O(1)
7682
@lifetime(self: copy self)
7783
public mutating func skipForward() -> Int {
7884
guard currentCodeUnitOffset < codeUnits.count else {
@@ -90,6 +96,8 @@ extension UTF8Span {
9096
///
9197
/// Returns the number of `Unicode.Scalar`s skipped over, which can be
9298
/// fewer than `n` if at the end of the UTF8Span.
99+
///
100+
/// - Complexity: O(n)
93101
@lifetime(self: copy self)
94102
public mutating func skipForward(by n: Int) -> Int {
95103
var numSkipped = 0
@@ -105,6 +113,8 @@ extension UTF8Span {
105113
///
106114
/// Returns the number of `Unicode.Scalar`s skipped over, which can be 0
107115
/// if at the start of the UTF8Span.
116+
///
117+
/// - Complexity: O(1)
108118
@lifetime(self: copy self)
109119
public mutating func skipBack() -> Int {
110120
guard currentCodeUnitOffset > 0 else {
@@ -122,6 +132,8 @@ extension UTF8Span {
122132
///
123133
/// Returns the number of `Unicode.Scalar`s skipped over, which can be
124134
/// fewer than `n` if at the start of the UTF8Span.
135+
///
136+
/// - Complexity: O(n)
125137
@lifetime(self: copy self)
126138
public mutating func skipBack(by n: Int) -> Int {
127139
var numSkipped = 0
@@ -132,35 +144,39 @@ extension UTF8Span {
132144
return numSkipped
133145
}
134146

147+
// TODO: Example for reset docs
148+
135149
/// Reset to the nearest scalar-aligned code unit offset `<= i`.
136150
///
137-
/// **TODO**: Example
151+
/// - Complexity: O(1)
138152
@lifetime(self: copy self)
139153
public mutating func reset(roundingBackwardsFrom i: Int) {
140154
self.currentCodeUnitOffset = codeUnits._scalarAlignBackwards(i)
141155
}
142156

143157
/// Reset to the nearest scalar-aligned code unit offset `>= i`.
144158
///
145-
/// **TODO**: Example
159+
/// - Complexity: O(1)
146160
@lifetime(self: copy self)
147161
public mutating func reset(roundingForwardsFrom i: Int) {
148162
self.currentCodeUnitOffset = codeUnits._scalarAlignForwards(i)
149163
}
150164

165+
// TODO: for below, verify that there is no path to UB, just garabage-data or guaranteed
166+
// trap!
167+
151168
/// Reset this iterator to `codeUnitOffset`, skipping _all_ safety
152169
/// checks (including bounds checks).
153170
///
154171
/// Note: This is only for very specific, low-level use cases. If
155172
/// `codeUnitOffset` is not properly scalar-aligned, this function can
156173
/// result in undefined behavior when, e.g., `next()` is called.
157174
///
158-
/// TODO: verify that we're not UB, just garabage-data or guaranteed
159-
/// trap!
160-
///
161175
/// For example, this could be used by a regex engine to backtrack to a
162176
/// known-valid previous position.
163177
///
178+
///
179+
/// - Complexity: O(1)
164180
@unsafe
165181
@lifetime(self: copy self)
166182
public mutating func reset(toUnchecked codeUnitOffset: Int) {
@@ -172,6 +188,8 @@ extension UTF8Span {
172188
/// current position.
173189
///
174190
/// The resultant `UTF8Span` has the same lifetime constraints as `self`.
191+
///
192+
/// - Complexity: O(1)
175193
@lifetime(copy self)
176194
public func prefix() -> UTF8Span {
177195
let slice = codeUnits.span.extracting(0..<currentCodeUnitOffset)
@@ -185,6 +203,8 @@ extension UTF8Span {
185203
/// current position.
186204
///
187205
/// The resultant `UTF8Span` has the same lifetime constraints as `self`.
206+
///
207+
/// - Complexity: O(1)
188208
@lifetime(copy self)
189209
public func suffix() -> UTF8Span {
190210
let slice = codeUnits.span.extracting(currentCodeUnitOffset..<codeUnits.count)
@@ -208,9 +228,9 @@ extension UTF8Span {
208228
.init(self)
209229
}
210230

231+
// **TODO**: Examples in below doc
232+
211233
/// Iterate the `Character` contents of a `UTF8Span`.
212-
///
213-
/// **TODO**: Examples
214234
public struct CharacterIterator: ~Escapable {
215235
public let codeUnits: UTF8Span
216236

0 commit comments

Comments
 (0)