|
13 | 13 |
|
14 | 14 | // MARK: - Private extensions for parsing encoding names |
15 | 15 |
|
16 | | -private extension Unicode.Scalar { |
17 | | - /// Returns the Boolean value that indicates whether or not `self` is "ASCII whitespace". |
18 | | - /// |
19 | | - /// Reference: https://infra.spec.whatwg.org/#ascii-whitespace |
20 | | - var _isASCIIWhitespace: Bool { |
21 | | - switch self.value { |
22 | | - case 0x09, 0x0A, 0x0C, 0x0D, 0x20: true |
| 16 | +private extension UTF8.CodeUnit { |
| 17 | + func _isASCIICaseinsensitivelyEqual(to other: UTF8.CodeUnit) -> Bool { |
| 18 | + return switch self { |
| 19 | + case other, other._uppercased, other._lowercased: true |
23 | 20 | default: false |
24 | 21 | } |
25 | 22 | } |
26 | 23 | } |
27 | 24 |
|
28 | 25 | private extension String { |
29 | | - var _trimmed: Substring.UnicodeScalarView { |
30 | | - let scalars = self.unicodeScalars |
31 | | - let isNonWhitespace: (Unicode.Scalar) -> Bool = { !$0._isASCIIWhitespace } |
32 | | - guard let firstIndexOfNonWhitespace = scalars.firstIndex(where: isNonWhitespace), |
33 | | - let lastIndexOfNonWhitespace = scalars.lastIndex(where: isNonWhitespace) else { |
34 | | - return Substring.UnicodeScalarView() |
35 | | - } |
36 | | - return scalars[firstIndexOfNonWhitespace...lastIndexOfNonWhitespace] |
37 | | - } |
38 | | -} |
39 | | - |
40 | | -/// A type that holds a `Unicode.Scalar` where its value is compared case-insensitively with others' |
41 | | -/// _if the value is within ASCII range_. |
42 | | -private struct ASCIICaseInsensitiveUnicodeScalar: Equatable, |
43 | | - ExpressibleByUnicodeScalarLiteral { |
44 | | - typealias UnicodeScalarLiteralType = Unicode.Scalar.UnicodeScalarLiteralType |
45 | | - |
46 | | - let scalar: Unicode.Scalar |
47 | | - |
48 | | - init(_ scalar: Unicode.Scalar) { |
49 | | - assert(scalar.isASCII) |
50 | | - self.scalar = scalar |
51 | | - } |
52 | | - |
53 | | - init(unicodeScalarLiteral value: Unicode.Scalar.UnicodeScalarLiteralType) { |
54 | | - self.init(Unicode.Scalar(unicodeScalarLiteral: value)) |
55 | | - } |
56 | | - |
57 | | - static func ==( |
58 | | - lhs: ASCIICaseInsensitiveUnicodeScalar, |
59 | | - rhs: ASCIICaseInsensitiveUnicodeScalar |
60 | | - ) -> Bool { |
61 | | - if lhs.scalar == rhs.scalar { |
62 | | - return true |
63 | | - } else if ("A"..."Z").contains(lhs.scalar) { |
64 | | - return lhs.scalar.value + 0x20 == rhs.scalar.value |
65 | | - } else if ("a"..."z").contains(lhs.scalar) { |
66 | | - return lhs.scalar.value - 0x20 == rhs.scalar.value |
67 | | - } |
68 | | - return false |
69 | | - } |
70 | | -} |
71 | | - |
72 | | -/// A type to tokenize string for `String.Encoding` names. |
73 | | -internal protocol StringEncodingNameTokenizer: ~Copyable { |
74 | | - associatedtype Token: Equatable |
75 | | - init(name: String) |
76 | | - mutating func nextToken() throws -> Token? |
77 | | -} |
78 | | - |
79 | | -extension StringEncodingNameTokenizer where Self: ~Copyable { |
80 | | - mutating func hasEqualTokens(with other: consuming Self) throws -> Bool { |
81 | | - while let myToken = try self.nextToken() { |
82 | | - guard let otherToken = try other.nextToken(), |
83 | | - myToken == otherToken else { |
| 26 | + func _isASCIICaseinsensitivelyEqual(to other: String) -> Bool { |
| 27 | + let (myUTF8, otherUTF8) = (self.utf8, other.utf8) |
| 28 | + var (myIndex, otherIndex) = (myUTF8.startIndex, otherUTF8.startIndex) |
| 29 | + while myIndex < myUTF8.endIndex && otherIndex < otherUTF8.endIndex { |
| 30 | + guard myUTF8[myIndex]._isASCIICaseinsensitivelyEqual(to: otherUTF8[otherIndex]) else { |
84 | 31 | return false |
85 | 32 | } |
86 | | - } |
87 | | - return try other.nextToken() == nil |
88 | | - } |
89 | | -} |
90 | | - |
91 | | - |
92 | | -/// A parser that tokenizes a string into `ASCIICaseInsensitiveUnicodeScalar`s. |
93 | | -private struct ASCIICaseInsensitiveTokenizer: StringEncodingNameTokenizer, ~Copyable { |
94 | | - typealias Token = ASCIICaseInsensitiveUnicodeScalar |
95 | 33 |
|
96 | | - enum Error: Swift.Error { |
97 | | - case nonASCII |
98 | | - } |
99 | | - |
100 | | - let scalars: Substring.UnicodeScalarView |
101 | | - |
102 | | - var _currentIndex: Substring.UnicodeScalarView.Index |
103 | | - |
104 | | - init(name: String) { |
105 | | - self.scalars = name._trimmed |
106 | | - self._currentIndex = scalars.startIndex |
107 | | - } |
108 | | - |
109 | | - mutating func nextToken() throws -> Token? { |
110 | | - guard _currentIndex < scalars.endIndex else { |
111 | | - return nil |
112 | | - } |
113 | | - let scalar = scalars[_currentIndex] |
114 | | - guard scalar.isASCII else { throw Error.nonASCII } |
115 | | - defer { |
116 | | - scalars.formIndex(after: &_currentIndex) |
117 | | - } |
118 | | - return ASCIICaseInsensitiveUnicodeScalar(scalar) |
119 | | - } |
120 | | -} |
121 | | - |
122 | | - |
123 | | -private extension String { |
124 | | - func isEqual<T>( |
125 | | - to other: String, |
126 | | - tokenizedBy tokenizer: T.Type |
127 | | - ) -> Bool where T: StringEncodingNameTokenizer, T: ~Copyable { |
128 | | - do { |
129 | | - var myTokenizer = T(name: self) |
130 | | - let otherTokenizer = T(name: other) |
131 | | - return try myTokenizer.hasEqualTokens(with: otherTokenizer) |
132 | | - } catch { |
133 | | - // Any errors imply that `self` or `other` contains invalid characters. |
134 | | - return false |
| 34 | + myUTF8.formIndex(after: &myIndex) |
| 35 | + otherUTF8.formIndex(after: &otherIndex) |
135 | 36 | } |
| 37 | + return myIndex == myUTF8.endIndex && otherIndex == otherUTF8.endIndex |
136 | 38 | } |
137 | 39 | } |
138 | 40 |
|
@@ -160,19 +62,16 @@ internal struct IANACharset { |
160 | 62 | self.aliases = aliases |
161 | 63 | } |
162 | 64 |
|
163 | | - func matches<T>( |
164 | | - _ string: String, |
165 | | - tokenizedBy tokenizer: T.Type |
166 | | - ) -> Bool where T: StringEncodingNameTokenizer, T: ~Copyable { |
| 65 | + func matches(_ string: String) -> Bool { |
167 | 66 | if let preferredMIMEName = self.preferredMIMEName, |
168 | | - preferredMIMEName.isEqual(to: string, tokenizedBy: tokenizer) { |
| 67 | + preferredMIMEName._isASCIICaseinsensitivelyEqual(to: string) { |
169 | 68 | return true |
170 | 69 | } |
171 | | - if name.isEqual(to: string, tokenizedBy: tokenizer) { |
| 70 | + if name._isASCIICaseinsensitivelyEqual(to: string) { |
172 | 71 | return true |
173 | 72 | } |
174 | 73 | for alias in aliases { |
175 | | - if alias.isEqual(to: string, tokenizedBy: tokenizer) { |
| 74 | + if alias._isASCIICaseinsensitivelyEqual(to: string) { |
176 | 75 | return true |
177 | 76 | } |
178 | 77 | } |
@@ -249,7 +148,7 @@ extension String.Encoding { |
249 | 148 | guard let ianaCharset = encoding._ianaCharset else { |
250 | 149 | continue |
251 | 150 | } |
252 | | - if ianaCharset.matches(charsetName, tokenizedBy: ASCIICaseInsensitiveTokenizer.self) { |
| 151 | + if ianaCharset.matches(charsetName) { |
253 | 152 | return encoding |
254 | 153 | } |
255 | 154 | } |
|
0 commit comments