Skip to content

Commit 0045a48

Browse files
committed
Rewrite script in Swift instead of Python.
In response to: #1286 (comment)
1 parent c7bdbef commit 0045a48

File tree

3 files changed

+196
-175
lines changed

3 files changed

+196
-175
lines changed

utils/update-iana-charset-names

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ declare generatedCode
5353
generatedCode=$(
5454
echo "${swiftLicenseHeader##$'\n'}"
5555
echo "$warningComment"
56-
python3 "${utilsDir}/${commandName}-impl.py"
56+
swift -D PRINT_CODE "${utilsDir}/${commandName}-impl.swift"
5757
)
5858

5959
echo "Writing the code to '${targetSwiftFileRelativePath}'..." 1>&2

utils/update-iana-charset-names-impl.py

Lines changed: 0 additions & 174 deletions
This file was deleted.
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
#!/usr/bin/env swift
2+
//===----------------------------------------------------------------------===//
3+
//
4+
// This source file is part of the Swift.org open source project
5+
//
6+
// Copyright (c) 2025 Apple Inc. and the Swift project authors
7+
// Licensed under Apache License v2.0 with Runtime Library Exception
8+
//
9+
// See https://swift.org/LICENSE.txt for license information
10+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
/*
15+
16+
This is a Swift script that converts an XML file containing the list of IANA
17+
"Character Sets" to Swift source code.
18+
This script generates minimum code and is intended to be executed by other shell
19+
script.
20+
21+
*/
22+
23+
import Foundation
24+
#if canImport(FoundationXML)
25+
import FoundationXML
26+
#endif
27+
28+
// MARK: - Constants
29+
30+
let requiredCharsetNames = [
31+
"UTF-8",
32+
"US-ASCII",
33+
"EUC-JP",
34+
"ISO-8859-1",
35+
"Shift_JIS",
36+
"ISO-8859-2",
37+
"UTF-16",
38+
"windows-1251",
39+
"windows-1252",
40+
"windows-1253",
41+
"windows-1254",
42+
"windows-1250",
43+
"ISO-2022-JP",
44+
"macintosh",
45+
"UTF-16BE",
46+
"UTF-16LE",
47+
"UTF-32",
48+
"UTF-32BE",
49+
"UTF-32LE",
50+
]
51+
let charsetsXMLURL = URL(
52+
string: "https://www.iana.org/assignments/character-sets/character-sets.xml"
53+
)!
54+
let charsetsXMLNamespace = "http://www.iana.org/assignments"
55+
let swiftCodeIndent = " "
56+
57+
58+
// MARK: - Implementation
59+
60+
enum CodeGenerationError: Swift.Error {
61+
case missingName
62+
case missingAliasValue
63+
case noRootElement
64+
}
65+
66+
/// Representation of <record> element in 'character-sets.xml'
67+
///
68+
/// The structure of <record> element is as blow:
69+
/// ```xml
70+
/// <record>
71+
/// <name>US-ASCII</name>
72+
/// <xref type="rfc" data="rfc2046"/>
73+
/// <value>3</value>
74+
/// <description>ANSI X3.4-1986</description>
75+
/// <alias>iso-ir-6</alias>
76+
/// <alias>ANSI_X3.4-1968</alias>
77+
/// <alias>ANSI_X3.4-1986</alias>
78+
/// <alias>ISO_646.irv:1991</alias>
79+
/// <alias>ISO646-US</alias>
80+
/// <alias>US-ASCII</alias>
81+
/// <alias>us</alias>
82+
/// <alias>IBM367</alias>
83+
/// <alias>cp367</alias>
84+
/// <alias>csASCII</alias>
85+
/// <preferred_alias>US-ASCII</preferred_alias>
86+
/// </record>
87+
/// ```
88+
struct IANACharsetNameRecord {
89+
/// Preferred MIME Name
90+
let preferredMIMEName: String?
91+
92+
/// The name of this charset
93+
let name: String
94+
95+
/// The aliases of this charset
96+
let aliases: Array<String>
97+
98+
var representativeName: String {
99+
return preferredMIMEName ?? name
100+
}
101+
102+
var swiftCodeLines: [String] {
103+
var lines: [String] = []
104+
lines.append("/// IANA Charset `\(representativeName)`.")
105+
lines.append("static let \(representativeName._camelcased()) = IANACharset(")
106+
lines.append("\(swiftCodeIndent)preferredMIMEName: \(preferredMIMEName.map { #""\#($0)""# } ?? "nil"),")
107+
lines.append("\(swiftCodeIndent)name: \"\(name)\",")
108+
lines.append("\(swiftCodeIndent)aliases: [")
109+
for alias in aliases {
110+
lines.append("\(swiftCodeIndent)\(swiftCodeIndent)\"\(alias)\",")
111+
}
112+
lines.append("\(swiftCodeIndent)]")
113+
lines.append(")")
114+
return lines
115+
}
116+
117+
init(_ node: XMLNode) throws {
118+
guard let name = try node.nodes(forXPath: "./name").first?.stringValue else {
119+
throw CodeGenerationError.missingName
120+
}
121+
self.name = name
122+
self.preferredMIMEName = try node.nodes(forXPath: "./preferred_alias").first?.stringValue
123+
self.aliases = try node.nodes(forXPath: "./alias").map {
124+
guard let alias = $0.stringValue else {
125+
throw CodeGenerationError.missingAliasValue
126+
}
127+
return alias
128+
}
129+
}
130+
}
131+
132+
func generateSwiftCode() throws -> String {
133+
let charsetsXMLDocument = try XMLDocument(contentsOf: charsetsXMLURL)
134+
guard let charsetsXMLRoot = charsetsXMLDocument.rootElement() else {
135+
throw CodeGenerationError.noRootElement
136+
}
137+
let charsetsXMLRecordElements = try charsetsXMLRoot.nodes(forXPath: "./registry/record")
138+
139+
var result = "extension IANACharset {"
140+
141+
for record in try charsetsXMLRecordElements.map({
142+
try IANACharsetNameRecord($0)
143+
}) where requiredCharsetNames.contains(record.representativeName) {
144+
result += "\n"
145+
result += record.swiftCodeLines.map({ swiftCodeIndent + $0 }).joined(separator: "\n")
146+
result += "\n"
147+
}
148+
149+
result += "}\n"
150+
return result
151+
}
152+
153+
#if PRINT_CODE
154+
print(try generateSwiftCode())
155+
#endif
156+
157+
// MARK: - Extensions
158+
159+
extension UTF8.CodeUnit {
160+
var _isASCIINumeric: Bool { (0x30...0x39).contains(self) }
161+
var _isASCIIUppercase: Bool { (0x41...0x5A).contains(self) }
162+
var _isASCIILowercase: Bool { (0x61...0x7A).contains(self) }
163+
}
164+
165+
extension String {
166+
func _camelcased() -> String {
167+
var result = ""
168+
var previousWord: Substring.UTF8View? = nil
169+
for wordUTF8 in self.utf8.split(whereSeparator: {
170+
!$0._isASCIINumeric &&
171+
!$0._isASCIIUppercase &&
172+
!$0._isASCIILowercase
173+
}) {
174+
defer {
175+
previousWord = wordUTF8
176+
}
177+
let word = String(Substring(wordUTF8))
178+
guard let previousWord else {
179+
result += word.lowercased()
180+
continue
181+
}
182+
if previousWord.last!._isASCIINumeric && wordUTF8.first!._isASCIINumeric {
183+
result += "_"
184+
}
185+
if let firstNonNumericIndex = wordUTF8.firstIndex(where: { !$0._isASCIINumeric }),
186+
wordUTF8[firstNonNumericIndex...].allSatisfy({ $0._isASCIIUppercase }) {
187+
result += word
188+
} else {
189+
result += word.capitalized(with: nil)
190+
}
191+
192+
}
193+
return result
194+
}
195+
}

0 commit comments

Comments
 (0)