Skip to content

Commit 4967fc0

Browse files
committed
[Unicode] Add convenience APIs to Unicode encodings
Add convenience APIs to the stdlib's Unicode encodings: * Unicode.UTF16 * isASCII * isSurrogate * Unicode.UTF8 * isASCII * width * Unicode.UTF32 * isASCII * Unicode.ASCII * isASCII Tests added
1 parent b6a158e commit 4967fc0

13 files changed

+423
-352
lines changed

stdlib/public/core/ASCII.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ extension Unicode.ASCII : Unicode.Encoding {
2323
return EncodedScalar(0x1a) // U+001A SUBSTITUTE; best we can do for ASCII
2424
}
2525

26+
/// Returns whether the given code unit represents an ASCII scalar
27+
@_alwaysEmitIntoClient
28+
public static func isASCII(_ x: CodeUnit) -> Bool { return UTF8.isASCII(x) }
29+
2630
@inline(__always)
2731
@inlinable
2832
public static func _isScalar(_ x: CodeUnit) -> Bool {

stdlib/public/core/LegacyABI.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,23 @@
1313
// This file contains non-API (or underscored) declarations that are needed to
1414
// be kept around for ABI compatibility
1515

16+
extension Unicode.UTF16 {
17+
@available(*, unavailable, renamed: "Unicode.UTF16.isASCII")
18+
@inlinable
19+
public static func _isASCII(_ x: CodeUnit) -> Bool {
20+
return Unicode.UTF16.isASCII(x)
21+
}
22+
}
23+
24+
@available(*, unavailable, renamed: "Unicode.UTF8.isASCII")
25+
@inlinable
26+
internal func _isASCII(_ x: UInt8) -> Bool {
27+
return Unicode.UTF8.isASCII(x)
28+
}
29+
30+
@available(*, unavailable, renamed: "Unicode.UTF8.isContinuation")
31+
@inlinable
32+
internal func _isContinuation(_ x: UInt8) -> Bool {
33+
return UTF8.isContinuation(x)
34+
}
35+

stdlib/public/core/StringComparison.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ private func _findBoundary(
239239
}
240240

241241
// Back up to scalar boundary
242-
while _isContinuation(utf8[_unchecked: idx]) {
242+
while UTF8.isContinuation(utf8[_unchecked: idx]) {
243243
idx &-= 1
244244
}
245245

stdlib/public/core/StringNormalization.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ extension UnsafeBufferPointer where Element == UInt8 {
108108
if index == 0 || index == count {
109109
return true
110110
}
111-
assert(!_isContinuation(self[_unchecked: index]))
111+
assert(!UTF8.isContinuation(self[_unchecked: index]))
112112

113113
// Sub-300 latiny fast-path
114114
if self[_unchecked: index] < 0xCC { return true }
@@ -165,7 +165,7 @@ extension UnsafeBufferPointer where Element == UInt8 {
165165
_internalInvariant(index == count)
166166
return true
167167
}
168-
return !_isContinuation(self[index])
168+
return !UTF8.isContinuation(self[index])
169169
}
170170

171171
}

stdlib/public/core/StringUTF8Validation.swift

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,15 @@ private func _isNotOverlong_F0(_ x: UInt8) -> Bool {
77
}
88

99
private func _isNotOverlong_F4(_ x: UInt8) -> Bool {
10-
return _isContinuation(x) && x <= 0x8F
10+
return UTF8.isContinuation(x) && x <= 0x8F
1111
}
1212

1313
private func _isNotOverlong_E0(_ x: UInt8) -> Bool {
1414
return (0xA0...0xBF).contains(x)
1515
}
1616

1717
private func _isNotOverlong_ED(_ x: UInt8) -> Bool {
18-
return _isContinuation(x) && x <= 0x9F
19-
}
20-
21-
private func _isASCII_cmp(_ x: UInt8) -> Bool {
22-
return x <= 0x7F
18+
return UTF8.isContinuation(x) && x <= 0x9F
2319
}
2420

2521
internal struct UTF8ExtraInfo: Equatable {
@@ -48,7 +44,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
4844
guard f(cu) else { throw UTF8ValidationError() }
4945
}
5046
@inline(__always) func guaranteeContinuation() throws {
51-
try guaranteeIn(_isContinuation)
47+
try guaranteeIn(UTF8.isContinuation)
5248
}
5349

5450
func _legacyInvalidLengthCalculation(_ _buffer: (_storage: UInt32, ())) -> Int {
@@ -94,7 +90,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
9490
var endIndex = buf.startIndex
9591
var iter = buf.makeIterator()
9692
_ = iter.next()
97-
while let cu = iter.next(), !_isASCII(cu) && !_isUTF8MultiByteLeading(cu) {
93+
while let cu = iter.next(), UTF8.isContinuation(cu) {
9894
endIndex += 1
9995
}
10096
let illegalRange = Range(buf.startIndex...endIndex)
@@ -107,7 +103,7 @@ internal func validateUTF8(_ buf: UnsafeBufferPointer<UInt8>) -> UTF8ValidationR
107103
do {
108104
var isASCII = true
109105
while let cu = iter.next() {
110-
if _isASCII(cu) { lastValidIndex &+= 1; continue }
106+
if UTF8.isASCII(cu) { lastValidIndex &+= 1; continue }
111107
isASCII = false
112108
if _slowPath(!_isUTF8MultiByteLeading(cu)) {
113109
throw UTF8ValidationError()

stdlib/public/core/StringUTF8View.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ extension String.UTF8View {
415415

416416
let (scalar, scalarLen) = _guts.foreignErrorCorrectedScalar(
417417
startingAt: i.strippingTranscoding)
418-
let utf8Len = _numUTF8CodeUnits(scalar)
418+
let utf8Len = UTF8.width(scalar)
419419

420420
if utf8Len == 1 {
421421
_internalInvariant(i.transcodedOffset == 0)
@@ -442,7 +442,7 @@ extension String.UTF8View {
442442

443443
let (scalar, scalarLen) = _guts.foreignErrorCorrectedScalar(
444444
endingAt: i)
445-
let utf8Len = _numUTF8CodeUnits(scalar)
445+
let utf8Len = UTF8.width(scalar)
446446
return i.encoded(offsetBy: -scalarLen).transcoded(withOffset: utf8Len &- 1)
447447
}
448448

stdlib/public/core/StringUnicodeScalarView.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ extension String.UnicodeScalarView {
418418
internal func _foreignIndex(after i: Index) -> Index {
419419
_internalInvariant(_guts.isForeign)
420420
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: i)
421-
let len = _isLeadingSurrogate(cu) ? 2 : 1
421+
let len = UTF16.isLeadSurrogate(cu) ? 2 : 1
422422

423423
return i.encoded(offsetBy: len)
424424
}
@@ -429,7 +429,7 @@ extension String.UnicodeScalarView {
429429
_internalInvariant(_guts.isForeign)
430430
let priorIdx = i.priorEncoded
431431
let cu = _guts.foreignErrorCorrectedUTF16CodeUnit(at: priorIdx)
432-
let len = _isTrailingSurrogate(cu) ? 2 : 1
432+
let len = UTF16.isTrailSurrogate(cu) ? 2 : 1
433433

434434
return i.encoded(offsetBy: -len)
435435
}

0 commit comments

Comments
 (0)