Skip to content

Commit 09a0524

Browse files
authored
Enable string conversion in EUC-JP. (#1296)
* Enable string conversion in EUC-JP. Background: EUC-JP is not supported by OSS CoreFoundation, while it is supported by macOS Foundation Framework. See #1016 This commit resolves the issue by calling ICU API if necessary. * ICU: Omit encodings that should be supported by FoundationEssentials. In response to: #1296 (comment) * ICU: Remove unnecessary `nonisolated(unsafe)` from static property. In response to: #1296 (comment) * Add comment to `func _icuMakeStringFromBytes_impl`. In response to: #1296 (comment) * Delegate string conversion to ICU only when encoding is EUC-JP. In response to: #1296 (comment) * Replace dynamic `_icu*` functions only if `!FOUNDATION_FRAMEWORK`. In response to: #1296 (comment) * Divide test cases depending on `FOUNDATION_FRAMEWORK` for EUC_JP conversion. In response to: #1296 (comment)
1 parent 22142cf commit 09a0524

File tree

4 files changed

+373
-0
lines changed

4 files changed

+373
-0
lines changed

Sources/FoundationEssentials/String/String+IO.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ dynamic public func _cfMakeStringFromBytes(_ bytes: UnsafeBufferPointer<UInt8>,
2424
// Provide swift-corelibs-foundation with an entry point to convert some bytes into a String
2525
return nil
2626
}
27+
28+
dynamic package func _icuMakeStringFromBytes(_ bytes: UnsafeBufferPointer<UInt8>, encoding: String.Encoding) -> String? {
29+
// Concrete implementation is provided by FoundationInternationalization.
30+
return nil
31+
}
2732
#endif
2833

2934
@available(macOS 10.10, iOS 8.0, watchOS 2.0, tvOS 9.0, *)
@@ -184,6 +189,17 @@ extension String {
184189
}
185190
}
186191
self = bytes.withContiguousStorageIfAvailable(buildString) ?? Array(bytes).withUnsafeBufferPointer(buildString)
192+
case .japaneseEUC:
193+
// Here we catch encodings that are supported by Foundation Framework
194+
// but are not supported by corelibs-foundation.
195+
// We delegate conversion to ICU.
196+
guard let string = (
197+
bytes.withContiguousStorageIfAvailable({ _icuMakeStringFromBytes($0, encoding: encoding) }) ??
198+
Array(bytes).withUnsafeBufferPointer({ _icuMakeStringFromBytes($0, encoding: encoding) })
199+
) else {
200+
return nil
201+
}
202+
self = string
187203
#endif
188204
default:
189205
#if FOUNDATION_FRAMEWORK

Sources/FoundationEssentials/String/StringProtocol+Essentials.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@ dynamic public func _cfStringEncodingConvert(string: String, using encoding: UIn
9191
// Dynamically replaced by swift-corelibs-foundation to implement encodings that we do not have Swift replacements for, yet
9292
return nil
9393
}
94+
95+
dynamic package func _icuStringEncodingConvert(string: String, using encoding: String.Encoding, allowLossyConversion: Bool) -> Data? {
96+
// Concrete implementation is provided by FoundationInternationalization.
97+
return nil
98+
}
9499
#endif
95100

96101
@available(FoundationPreview 0.4, *)
@@ -249,6 +254,11 @@ extension String {
249254
buffer.appendElement(value)
250255
}
251256
}
257+
case .japaneseEUC:
258+
// Here we catch encodings that are supported by Foundation Framework
259+
// but are not supported by corelibs-foundation.
260+
// We delegate conversion to ICU.
261+
return _icuStringEncodingConvert(string: self, using: encoding, allowLossyConversion: allowLossyConversion)
252262
#endif
253263
default:
254264
#if FOUNDATION_FRAMEWORK
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#if canImport(FoundationEssentials)
14+
import FoundationEssentials
15+
#endif
16+
internal import _FoundationICU
17+
18+
private extension String.Encoding {
19+
var _icuConverterName: String? {
20+
// TODO: Replace this with forthcoming(?) public property such as https://github.com/swiftlang/swift-foundation/pull/1243
21+
// Note: UTF-* and US-ASCII are omitted here because they are supposed to be converted upstream.
22+
switch self {
23+
case .japaneseEUC: "EUC-JP"
24+
case .isoLatin1: "ISO-8859-1"
25+
case .shiftJIS: "Shift_JIS"
26+
case .isoLatin2: "ISO-8859-2"
27+
case .windowsCP1251: "windows-1251"
28+
case .windowsCP1252: "windows-1252"
29+
case .windowsCP1253: "windows-1253"
30+
case .windowsCP1254: "windows-1254"
31+
case .windowsCP1250: "windows-1250"
32+
case .iso2022JP: "ISO-2022-JP"
33+
case .macOSRoman: "macintosh"
34+
default: nil
35+
}
36+
}
37+
}
38+
39+
extension ICU {
40+
final class StringConverter: @unchecked Sendable {
41+
private let _converter: LockedState<OpaquePointer> // UConverter*
42+
43+
let encoding: String.Encoding
44+
45+
init?(encoding: String.Encoding) {
46+
guard let convName = encoding._icuConverterName else {
47+
return nil
48+
}
49+
var status: UErrorCode = U_ZERO_ERROR
50+
guard let converter = ucnv_open(convName, &status), status.isSuccess else {
51+
return nil
52+
}
53+
self._converter = LockedState(initialState: converter)
54+
self.encoding = encoding
55+
}
56+
57+
deinit {
58+
_converter.withLock { ucnv_close($0) }
59+
}
60+
}
61+
}
62+
63+
extension ICU.StringConverter {
64+
func decode(data: Data) -> String? {
65+
return _converter.withLock { converter in
66+
defer {
67+
ucnv_resetToUnicode(converter)
68+
}
69+
70+
let srcLength = CInt(data.count)
71+
let initCapacity = srcLength * CInt(ucnv_getMinCharSize(converter)) + 1
72+
return _withResizingUCharBuffer(initialSize: initCapacity) { (dest, capacity, status) in
73+
return data.withUnsafeBytes { src in
74+
ucnv_toUChars(
75+
converter,
76+
dest,
77+
capacity,
78+
src.baseAddress,
79+
srcLength,
80+
&status
81+
)
82+
}
83+
}
84+
}
85+
}
86+
87+
func encode(string: String, allowLossyConversion lossy: Bool) -> Data? {
88+
return _converter.withLock { (converter) -> Data? in
89+
defer {
90+
ucnv_resetFromUnicode(converter)
91+
}
92+
93+
let utf16Rep = string.utf16
94+
let uchars = UnsafeMutableBufferPointer<UChar>.allocate(capacity: utf16Rep.count)
95+
_ = uchars.initialize(fromContentsOf: utf16Rep)
96+
defer {
97+
uchars.deallocate()
98+
}
99+
100+
let srcLength = uchars.count
101+
let capacity = srcLength * Int(ucnv_getMaxCharSize(converter)) + 1
102+
let dest = UnsafeMutableRawPointer.allocate(
103+
byteCount: capacity,
104+
alignment: MemoryLayout<CChar>.alignment
105+
)
106+
107+
var status: UErrorCode = U_ZERO_ERROR
108+
if lossy {
109+
var lossyChar: UChar = encoding == .ascii ? 0xFF : 0x3F
110+
ucnv_setSubstString(
111+
converter,
112+
&lossyChar,
113+
1,
114+
&status
115+
)
116+
guard status.isSuccess else { return nil }
117+
118+
ucnv_setFromUCallBack(
119+
converter,
120+
UCNV_FROM_U_CALLBACK_SUBSTITUTE,
121+
nil, // newContext
122+
nil, // oldAction
123+
nil, // oldContext
124+
&status
125+
)
126+
guard status.isSuccess else { return nil }
127+
} else {
128+
ucnv_setFromUCallBack(
129+
converter,
130+
UCNV_FROM_U_CALLBACK_STOP,
131+
nil, // newContext
132+
nil, // oldAction
133+
nil, // oldContext
134+
&status
135+
)
136+
guard status.isSuccess else { return nil }
137+
}
138+
139+
let actualLength = ucnv_fromUChars(
140+
converter,
141+
dest,
142+
CInt(capacity),
143+
uchars.baseAddress,
144+
CInt(srcLength),
145+
&status
146+
)
147+
guard status.isSuccess else { return nil }
148+
return Data(
149+
bytesNoCopy: dest,
150+
count: Int(actualLength),
151+
deallocator: .custom({ pointer, _ in pointer.deallocate() })
152+
)
153+
}
154+
}
155+
}
156+
157+
extension ICU.StringConverter {
158+
private static let _converters: LockedState<[String.Encoding: ICU.StringConverter]> = .init(initialState: [:])
159+
160+
static func converter(for encoding: String.Encoding) -> ICU.StringConverter? {
161+
return _converters.withLock {
162+
if let converter = $0[encoding] {
163+
return converter
164+
}
165+
if let converter = ICU.StringConverter(encoding: encoding) {
166+
$0[encoding] = converter
167+
return converter
168+
}
169+
return nil
170+
}
171+
}
172+
}
173+
174+
175+
#if !FOUNDATION_FRAMEWORK
176+
@_dynamicReplacement(for: _icuMakeStringFromBytes(_:encoding:))
177+
func _icuMakeStringFromBytes_impl(_ bytes: UnsafeBufferPointer<UInt8>, encoding: String.Encoding) -> String? {
178+
guard let converter = ICU.StringConverter.converter(for: encoding),
179+
let pointer = bytes.baseAddress else {
180+
return nil
181+
}
182+
183+
// Since we want to avoid unnecessary copy here,
184+
// `bytes` is converted to `UnsafeMutableRawPointer`
185+
// because `Data(bytesNoCopy:count:deallocator:)` accepts only that type.
186+
// This operation is still safe,
187+
// as the pointer is just borrowed (not escaped, not mutated)
188+
// in `ICU.StringConverter.decode(data:) -> String?`.
189+
// In addition to that, `Data` is useful here
190+
// because it is `Sendable` (and has CoW behavior).
191+
let data = Data(
192+
bytesNoCopy: UnsafeMutableRawPointer(mutating: pointer),
193+
count: bytes.count,
194+
deallocator: .none
195+
)
196+
return converter.decode(data: data)
197+
}
198+
199+
@_dynamicReplacement(for: _icuStringEncodingConvert(string:using:allowLossyConversion:))
200+
func _icuStringEncodingConvert_impl(string: String, using encoding: String.Encoding, allowLossyConversion: Bool) -> Data? {
201+
guard let converter = ICU.StringConverter.converter(for: encoding) else {
202+
return nil
203+
}
204+
return converter.encode(string: string, allowLossyConversion: allowLossyConversion)
205+
}
206+
#endif

0 commit comments

Comments
 (0)