| 
 | 1 | +//===----------------------------------------------------------------------===//  | 
 | 2 | +//  | 
 | 3 | +// This source file is part of the Swift.org open source project  | 
 | 4 | +//  | 
 | 5 | +// Copyright (c) 2025 Apple Inc. and the Swift project authors  | 
 | 6 | +// Licensed under Apache License v2.0 with Runtime Library Exception  | 
 | 7 | +//  | 
 | 8 | +// See https://swift.org/LICENSE.txt for license information  | 
 | 9 | +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors  | 
 | 10 | +//  | 
 | 11 | +//===----------------------------------------------------------------------===//  | 
 | 12 | + | 
 | 13 | +#if canImport(FoundationEssentials)  | 
 | 14 | +import FoundationEssentials  | 
 | 15 | +#endif  | 
 | 16 | +internal import _FoundationICU  | 
 | 17 | + | 
 | 18 | +private extension String.Encoding {  | 
 | 19 | +    var _icuConverterName: String? {  | 
 | 20 | +        // TODO: Replace this with forthcoming(?) public property such as https://github.com/swiftlang/swift-foundation/pull/1243  | 
 | 21 | +        // Note: UTF-* and US-ASCII are omitted here because they are supposed to be converted upstream.  | 
 | 22 | +        switch self {  | 
 | 23 | +        case .japaneseEUC: "EUC-JP"  | 
 | 24 | +        case .isoLatin1: "ISO-8859-1"  | 
 | 25 | +        case .shiftJIS: "Shift_JIS"  | 
 | 26 | +        case .isoLatin2: "ISO-8859-2"  | 
 | 27 | +        case .windowsCP1251: "windows-1251"  | 
 | 28 | +        case .windowsCP1252: "windows-1252"  | 
 | 29 | +        case .windowsCP1253: "windows-1253"  | 
 | 30 | +        case .windowsCP1254: "windows-1254"  | 
 | 31 | +        case .windowsCP1250: "windows-1250"  | 
 | 32 | +        case .iso2022JP: "ISO-2022-JP"  | 
 | 33 | +        case .macOSRoman: "macintosh"  | 
 | 34 | +        default: nil  | 
 | 35 | +        }  | 
 | 36 | +    }  | 
 | 37 | +}  | 
 | 38 | + | 
 | 39 | +extension ICU {  | 
 | 40 | +    final class StringConverter: @unchecked Sendable {  | 
 | 41 | +        private let _converter: LockedState<OpaquePointer> // UConverter*  | 
 | 42 | + | 
 | 43 | +        let encoding: String.Encoding  | 
 | 44 | + | 
 | 45 | +        init?(encoding: String.Encoding) {  | 
 | 46 | +            guard let convName = encoding._icuConverterName else {  | 
 | 47 | +                return nil  | 
 | 48 | +            }  | 
 | 49 | +            var status: UErrorCode = U_ZERO_ERROR  | 
 | 50 | +            guard let converter = ucnv_open(convName, &status), status.isSuccess else {  | 
 | 51 | +                return nil  | 
 | 52 | +            }  | 
 | 53 | +            self._converter = LockedState(initialState: converter)  | 
 | 54 | +            self.encoding = encoding  | 
 | 55 | +        }  | 
 | 56 | + | 
 | 57 | +        deinit {  | 
 | 58 | +            _converter.withLock { ucnv_close($0) }  | 
 | 59 | +        }  | 
 | 60 | +    }  | 
 | 61 | +}  | 
 | 62 | + | 
 | 63 | +extension ICU.StringConverter {  | 
 | 64 | +    func decode(data: Data) -> String? {  | 
 | 65 | +        return _converter.withLock { converter in  | 
 | 66 | +            defer {  | 
 | 67 | +                ucnv_resetToUnicode(converter)  | 
 | 68 | +            }  | 
 | 69 | + | 
 | 70 | +            let srcLength = CInt(data.count)  | 
 | 71 | +            let initCapacity = srcLength * CInt(ucnv_getMinCharSize(converter)) + 1  | 
 | 72 | +            return _withResizingUCharBuffer(initialSize: initCapacity) { (dest, capacity, status) in  | 
 | 73 | +                return data.withUnsafeBytes { src in  | 
 | 74 | +                    ucnv_toUChars(  | 
 | 75 | +                        converter,  | 
 | 76 | +                        dest,  | 
 | 77 | +                        capacity,  | 
 | 78 | +                        src.baseAddress,  | 
 | 79 | +                        srcLength,  | 
 | 80 | +                        &status  | 
 | 81 | +                    )  | 
 | 82 | +                }  | 
 | 83 | +            }  | 
 | 84 | +        }  | 
 | 85 | +    }  | 
 | 86 | + | 
 | 87 | +    func encode(string: String, allowLossyConversion lossy: Bool) -> Data?  {  | 
 | 88 | +        return _converter.withLock { (converter) -> Data? in  | 
 | 89 | +            defer {  | 
 | 90 | +                ucnv_resetFromUnicode(converter)  | 
 | 91 | +            }  | 
 | 92 | + | 
 | 93 | +            let utf16Rep = string.utf16  | 
 | 94 | +            let uchars = UnsafeMutableBufferPointer<UChar>.allocate(capacity: utf16Rep.count)  | 
 | 95 | +            _ = uchars.initialize(fromContentsOf: utf16Rep)  | 
 | 96 | +            defer {  | 
 | 97 | +                uchars.deallocate()  | 
 | 98 | +            }  | 
 | 99 | + | 
 | 100 | +            let srcLength = uchars.count  | 
 | 101 | +            let capacity = srcLength * Int(ucnv_getMaxCharSize(converter)) + 1  | 
 | 102 | +            let dest = UnsafeMutableRawPointer.allocate(  | 
 | 103 | +                byteCount: capacity,  | 
 | 104 | +                alignment: MemoryLayout<CChar>.alignment  | 
 | 105 | +            )  | 
 | 106 | + | 
 | 107 | +            var status: UErrorCode = U_ZERO_ERROR  | 
 | 108 | +            if lossy {  | 
 | 109 | +                var lossyChar: UChar = encoding == .ascii ? 0xFF : 0x3F  | 
 | 110 | +                ucnv_setSubstString(  | 
 | 111 | +                    converter,  | 
 | 112 | +                    &lossyChar,  | 
 | 113 | +                    1,  | 
 | 114 | +                    &status  | 
 | 115 | +                )  | 
 | 116 | +                guard status.isSuccess else { return nil }  | 
 | 117 | + | 
 | 118 | +                ucnv_setFromUCallBack(  | 
 | 119 | +                    converter,  | 
 | 120 | +                    UCNV_FROM_U_CALLBACK_SUBSTITUTE,  | 
 | 121 | +                    nil, // newContext  | 
 | 122 | +                    nil, // oldAction  | 
 | 123 | +                    nil, // oldContext  | 
 | 124 | +                    &status  | 
 | 125 | +                )  | 
 | 126 | +                guard status.isSuccess else { return nil }  | 
 | 127 | +            } else {  | 
 | 128 | +                ucnv_setFromUCallBack(  | 
 | 129 | +                    converter,  | 
 | 130 | +                    UCNV_FROM_U_CALLBACK_STOP,  | 
 | 131 | +                    nil, // newContext  | 
 | 132 | +                    nil, // oldAction  | 
 | 133 | +                    nil, // oldContext  | 
 | 134 | +                    &status  | 
 | 135 | +                )  | 
 | 136 | +                guard status.isSuccess else { return nil }  | 
 | 137 | +            }  | 
 | 138 | + | 
 | 139 | +            let actualLength = ucnv_fromUChars(  | 
 | 140 | +                converter,  | 
 | 141 | +                dest,  | 
 | 142 | +                CInt(capacity),  | 
 | 143 | +                uchars.baseAddress,  | 
 | 144 | +                CInt(srcLength),  | 
 | 145 | +                &status  | 
 | 146 | +            )  | 
 | 147 | +            guard status.isSuccess else { return nil }  | 
 | 148 | +            return Data(  | 
 | 149 | +                bytesNoCopy: dest,  | 
 | 150 | +                count: Int(actualLength),  | 
 | 151 | +                deallocator: .custom({ pointer, _ in pointer.deallocate() })  | 
 | 152 | +            )  | 
 | 153 | +        }  | 
 | 154 | +    }  | 
 | 155 | +}  | 
 | 156 | + | 
 | 157 | +extension ICU.StringConverter {  | 
 | 158 | +    private static let _converters: LockedState<[String.Encoding: ICU.StringConverter]> = .init(initialState: [:])  | 
 | 159 | + | 
 | 160 | +    static func converter(for encoding: String.Encoding) -> ICU.StringConverter? {  | 
 | 161 | +        return _converters.withLock {  | 
 | 162 | +            if let converter = $0[encoding] {  | 
 | 163 | +                return converter  | 
 | 164 | +            }  | 
 | 165 | +            if let converter = ICU.StringConverter(encoding: encoding) {  | 
 | 166 | +                $0[encoding] = converter  | 
 | 167 | +                return converter  | 
 | 168 | +            }  | 
 | 169 | +            return nil  | 
 | 170 | +        }  | 
 | 171 | +    }  | 
 | 172 | +}  | 
 | 173 | + | 
 | 174 | + | 
 | 175 | +#if !FOUNDATION_FRAMEWORK  | 
 | 176 | +@_dynamicReplacement(for: _icuMakeStringFromBytes(_:encoding:))  | 
 | 177 | +func _icuMakeStringFromBytes_impl(_ bytes: UnsafeBufferPointer<UInt8>, encoding: String.Encoding) -> String? {  | 
 | 178 | +    guard let converter = ICU.StringConverter.converter(for: encoding),  | 
 | 179 | +          let pointer = bytes.baseAddress else {  | 
 | 180 | +        return nil  | 
 | 181 | +    }  | 
 | 182 | + | 
 | 183 | +    // Since we want to avoid unnecessary copy here,  | 
 | 184 | +    // `bytes` is converted to `UnsafeMutableRawPointer`  | 
 | 185 | +    // because `Data(bytesNoCopy:count:deallocator:)` accepts only that type.  | 
 | 186 | +    // This operation is still safe,  | 
 | 187 | +    // as the pointer is just borrowed (not escaped, not mutated)  | 
 | 188 | +    // in `ICU.StringConverter.decode(data:) -> String?`.  | 
 | 189 | +    // In addition to that, `Data` is useful here  | 
 | 190 | +    // because it is `Sendable` (and has CoW behavior).  | 
 | 191 | +    let data =  Data(  | 
 | 192 | +        bytesNoCopy: UnsafeMutableRawPointer(mutating: pointer),  | 
 | 193 | +        count: bytes.count,  | 
 | 194 | +        deallocator: .none  | 
 | 195 | +    )  | 
 | 196 | +    return converter.decode(data: data)  | 
 | 197 | +}  | 
 | 198 | + | 
 | 199 | +@_dynamicReplacement(for: _icuStringEncodingConvert(string:using:allowLossyConversion:))  | 
 | 200 | +func _icuStringEncodingConvert_impl(string: String, using encoding: String.Encoding, allowLossyConversion: Bool) -> Data? {  | 
 | 201 | +    guard let converter = ICU.StringConverter.converter(for: encoding) else {  | 
 | 202 | +        return nil  | 
 | 203 | +    }  | 
 | 204 | +    return converter.encode(string: string, allowLossyConversion: allowLossyConversion)  | 
 | 205 | +}  | 
 | 206 | +#endif  | 
0 commit comments