Skip to content

Commit 38f7ec2

Browse files
committed
[stdlib] Fix the String.decodeCString for UTF16 and UTF32 (#2681)
[stdlib] Fix the `String.decodeCString` for UTF16 and UTF32 Resolves [SR-1578](https://bugs.swift.org/browse/SR-1578] Essentially the problem was that `strlen` is not the right way of obtaining a length of anything but null-terminated UTF-8 sequence of characters. Other encodings require alternative mechanisms.
1 parent ed756df commit 38f7ec2

File tree

4 files changed

+88
-6
lines changed

4 files changed

+88
-6
lines changed

stdlib/public/core/CString.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ extension String {
137137
guard let cString = cString else {
138138
return nil
139139
}
140-
let len = Int(_swift_stdlib_strlen(UnsafePointer(cString)))
140+
let len = encoding._nullCodeUnitOffset(in: cString)
141141
let buffer = UnsafeBufferPointer<Encoding.CodeUnit>(
142142
start: cString, count: len)
143143

stdlib/public/core/Unicode.swift

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
import SwiftShims
1314

1415
// Conversions between different Unicode encodings. Note that UTF-16 and
1516
// UTF-32 decoding are *not* currently resilient to erroneous data.
@@ -132,6 +133,12 @@ public protocol UnicodeCodec {
132133
_ input: UnicodeScalar,
133134
sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void
134135
)
136+
137+
/// Searches for the first occurrence of a `CodeUnit` that is equal to 0.
138+
///
139+
/// Is an equivalent of `strlen` for C-strings.
140+
/// - Complexity: O(n)
141+
static func _nullCodeUnitOffset(in input: UnsafePointer<CodeUnit>) -> Int
135142
}
136143

137144
/// A codec for translating between Unicode scalar values and UTF-8 code
@@ -428,6 +435,10 @@ public struct UTF8 : UnicodeCodec {
428435
public static func isContinuation(_ byte: CodeUnit) -> Bool {
429436
return byte & 0b11_00__0000 == 0b10_00__0000
430437
}
438+
439+
public static func _nullCodeUnitOffset(in input: UnsafePointer<CodeUnit>) -> Int {
440+
return Int(_swift_stdlib_strlen(UnsafePointer(input)))
441+
}
431442
}
432443

433444
/// A codec for translating between Unicode scalar values and UTF-16 code
@@ -1149,6 +1160,22 @@ extension UnicodeScalar {
11491160
}
11501161
}
11511162

1163+
extension UnicodeCodec where CodeUnit : UnsignedInteger {
1164+
public static func _nullCodeUnitOffset(in input: UnsafePointer<CodeUnit>) -> Int {
1165+
var length = 0
1166+
while input[length] != 0 {
1167+
length += 1
1168+
}
1169+
return length
1170+
}
1171+
}
1172+
1173+
extension UnicodeCodec {
1174+
public static func _nullCodeUnitOffset(in input: UnsafePointer<CodeUnit>) -> Int {
1175+
fatalError("_nullCodeUnitOffset(in:) implementation should be provided")
1176+
}
1177+
}
1178+
11521179
@available(*, unavailable, renamed: "UnicodeCodec")
11531180
public typealias UnicodeCodecType = UnicodeCodec
11541181

validation-test/stdlib/String.swift

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1252,5 +1252,35 @@ StringTests.test("String.append(_: Character)") {
12521252
}
12531253
}
12541254

1255-
runAllTests()
1255+
internal func decodeCString<
1256+
C : UnicodeCodec
1257+
where
1258+
C.CodeUnit : UnsignedInteger
1259+
>(_ s: String, as codec: C.Type)
1260+
-> (result: String, repairsMade: Bool)? {
1261+
let units = s.unicodeScalars.map({ $0.value }) + [0]
1262+
return units.map({ C.CodeUnit(numericCast($0)) }).withUnsafeBufferPointer {
1263+
String.decodeCString($0.baseAddress, as: C.self)
1264+
}
1265+
}
12561266

1267+
StringTests.test("String.decodeCString/UTF8") {
1268+
let actual = decodeCString("foobar", as: UTF8.self)
1269+
expectFalse(actual!.repairsMade)
1270+
expectEqual("foobar", actual!.result)
1271+
}
1272+
1273+
StringTests.test("String.decodeCString/UTF16") {
1274+
let actual = decodeCString("foobar", as: UTF16.self)
1275+
expectFalse(actual!.repairsMade)
1276+
expectEqual("foobar", actual!.result)
1277+
}
1278+
1279+
StringTests.test("String.decodeCString/UTF32") {
1280+
let actual = decodeCString("foobar", as: UTF32.self)
1281+
expectFalse(actual!.repairsMade)
1282+
expectEqual("foobar", actual!.result)
1283+
}
1284+
1285+
1286+
runAllTests()

validation-test/stdlib/Unicode.swift renamed to validation-test/stdlib/Unicode.swift.gyb

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
// RUN: rm -rf %t
2-
// RUN: mkdir -p %t
3-
// RUN: %target-build-swift %s -o %t/a.out
4-
// RUN: %target-run %t/a.out
1+
// RUN: rm -rf %t && mkdir -p %t && %S/../../utils/gyb %s -o %t/Unicode.swift
2+
// RUN: %S/../../utils/line-directive %t/Unicode.swift -- %target-build-swift %t/Unicode.swift -o %t/a.out -Xfrontend -disable-objc-attr-requires-foundation-module
3+
// RUN: %S/../../utils/line-directive %t/Unicode.swift -- %target-run %t/a.out
54
// REQUIRES: executable_test
65

76
import SwiftPrivate
@@ -2464,5 +2463,31 @@ StringTests.test("StreamableConformance") {
24642463

24652464
#endif // _runtime(_ObjC)
24662465

2466+
let nullOffsetTests = [
2467+
(input: "\0", expected: 0),
2468+
(input: "a\0", expected: 1),
2469+
(input: "foobar\0", expected: 6)
2470+
]
2471+
2472+
let NullCodeUnitOffsetTests = TestSuite("NullCodeUnitOffsetTests")
2473+
2474+
% for (Encoding, View) in [
2475+
% ('UTF8', 'utf8'),
2476+
% ('UTF16', 'utf16'),
2477+
% ('UTF32', 'unicodeScalars.map{ $0.value }')
2478+
% ]:
2479+
2480+
NullCodeUnitOffsetTests.test("${Encoding}._nullCodeUnitOffset(in:)") {
2481+
for test in nullOffsetTests {
2482+
let actual = Array(test.input.${View})
2483+
.withUnsafeBufferPointer { p in
2484+
${Encoding}._nullCodeUnitOffset(in: p.baseAddress!)
2485+
}
2486+
expectEqual(test.expected, actual)
2487+
}
2488+
}
2489+
2490+
% end
2491+
24672492
runAllTests()
24682493

0 commit comments

Comments
 (0)