Skip to content

Commit 23ba0ee

Browse files
moiseevtkremenek
authored andcommitted
[stdlib] Fix the String.decodeCString for UTF16 and UTF32 (#2713)
Resolves [SR-1578](https://bugs.swift.org/browse/SR-1578] Essentially the problem was that `strlen` is not the right way of obtaining a length of anything but null-terminated UTF-8 sequence of characters. Other encodings require alternative mechanisms.
1 parent 6bbf8ba commit 23ba0ee

File tree

4 files changed

+88
-3
lines changed

4 files changed

+88
-3
lines changed

stdlib/public/core/CString.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ extension String {
138138
guard let cString = cString else {
139139
return nil
140140
}
141-
let len = Int(_swift_stdlib_strlen(UnsafePointer(cString)))
141+
let len = encoding._nullCodeUnitOffset(in: cString)
142142
let buffer = UnsafeBufferPointer<Encoding.CodeUnit>(
143143
start: cString, count: len)
144144

stdlib/public/core/Unicode.swift

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
import SwiftShims
1314

1415
// Conversions between different Unicode encodings. Note that UTF-16 and
1516
// UTF-32 decoding are *not* currently resilient to erroneous data.
@@ -132,6 +133,12 @@ public protocol UnicodeCodec {
132133
_ input: UnicodeScalar,
133134
sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void
134135
)
136+
137+
/// Searches for the first occurrence of a `CodeUnit` that is equal to 0.
138+
///
139+
/// Is an equivalent of `strlen` for C-strings.
140+
/// - Complexity: O(n)
141+
static func _nullCodeUnitOffset(in input: UnsafePointer<CodeUnit>) -> Int
135142
}
136143

137144
/// A codec for translating between Unicode scalar values and UTF-8 code
@@ -430,6 +437,10 @@ public struct UTF8 : UnicodeCodec {
430437
public static func isContinuation(_ byte: CodeUnit) -> Bool {
431438
return byte & 0b11_00__0000 == 0b10_00__0000
432439
}
440+
441+
public static func _nullCodeUnitOffset(in input: UnsafePointer<CodeUnit>) -> Int {
442+
return Int(_swift_stdlib_strlen(UnsafePointer(input)))
443+
}
433444
}
434445

435446
/// A codec for translating between Unicode scalar values and UTF-16 code
@@ -1159,6 +1170,22 @@ extension UnicodeScalar {
11591170
}
11601171
}
11611172

1173+
extension UnicodeCodec where CodeUnit : UnsignedInteger {
1174+
public static func _nullCodeUnitOffset(in input: UnsafePointer<CodeUnit>) -> Int {
1175+
var length = 0
1176+
while input[length] != 0 {
1177+
length += 1
1178+
}
1179+
return length
1180+
}
1181+
}
1182+
1183+
extension UnicodeCodec {
1184+
public static func _nullCodeUnitOffset(in input: UnsafePointer<CodeUnit>) -> Int {
1185+
fatalError("_nullCodeUnitOffset(in:) implementation should be provided")
1186+
}
1187+
}
1188+
11621189
@available(*, unavailable, renamed: "UnicodeCodec")
11631190
public typealias UnicodeCodecType = UnicodeCodec
11641191

validation-test/stdlib/String.swift

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1252,5 +1252,35 @@ StringTests.test("String.append(_: Character)") {
12521252
}
12531253
}
12541254

1255-
runAllTests()
1255+
internal func decodeCString<
1256+
C : UnicodeCodec
1257+
where
1258+
C.CodeUnit : UnsignedInteger
1259+
>(_ s: String, as codec: C.Type)
1260+
-> (result: String, repairsMade: Bool)? {
1261+
let units = s.unicodeScalars.map({ $0.value }) + [0]
1262+
return units.map({ C.CodeUnit(numericCast($0)) }).withUnsafeBufferPointer {
1263+
String.decodeCString($0.baseAddress, as: C.self)
1264+
}
1265+
}
12561266

1267+
StringTests.test("String.decodeCString/UTF8") {
1268+
let actual = decodeCString("foobar", as: UTF8.self)
1269+
expectFalse(actual!.repairsMade)
1270+
expectEqual("foobar", actual!.result)
1271+
}
1272+
1273+
StringTests.test("String.decodeCString/UTF16") {
1274+
let actual = decodeCString("foobar", as: UTF16.self)
1275+
expectFalse(actual!.repairsMade)
1276+
expectEqual("foobar", actual!.result)
1277+
}
1278+
1279+
StringTests.test("String.decodeCString/UTF32") {
1280+
let actual = decodeCString("foobar", as: UTF32.self)
1281+
expectFalse(actual!.repairsMade)
1282+
expectEqual("foobar", actual!.result)
1283+
}
1284+
1285+
1286+
runAllTests()

validation-test/stdlib/Unicode.swift renamed to validation-test/stdlib/Unicode.swift.gyb

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
// RUN: %target-run-simple-swift
1+
// RUN: rm -rf %t && mkdir -p %t && %S/../../utils/gyb %s -o %t/Unicode.swift
2+
// RUN: %S/../../utils/line-directive %t/Unicode.swift -- %target-build-swift %t/Unicode.swift -o %t/a.out -Xfrontend -disable-objc-attr-requires-foundation-module
3+
// RUN: %S/../../utils/line-directive %t/Unicode.swift -- %target-run %t/a.out
24
// REQUIRES: executable_test
35

46
// FIXME: rdar://problem/19648117 Needs splitting objc parts out
@@ -2461,5 +2463,31 @@ StringTests.test("StreamableConformance") {
24612463
}
24622464
}
24632465

2466+
let nullOffsetTests = [
2467+
(input: "\0", expected: 0),
2468+
(input: "a\0", expected: 1),
2469+
(input: "foobar\0", expected: 6)
2470+
]
2471+
2472+
let NullCodeUnitOffsetTests = TestSuite("NullCodeUnitOffsetTests")
2473+
2474+
% for (Encoding, View) in [
2475+
% ('UTF8', 'utf8'),
2476+
% ('UTF16', 'utf16'),
2477+
% ('UTF32', 'unicodeScalars.map{ $0.value }')
2478+
% ]:
2479+
2480+
NullCodeUnitOffsetTests.test("${Encoding}._nullCodeUnitOffset(in:)") {
2481+
for test in nullOffsetTests {
2482+
let actual = Array(test.input.${View})
2483+
.withUnsafeBufferPointer { p in
2484+
${Encoding}._nullCodeUnitOffset(in: p.baseAddress!)
2485+
}
2486+
expectEqual(test.expected, actual)
2487+
}
2488+
}
2489+
2490+
% end
2491+
24642492
runAllTests()
24652493

0 commit comments

Comments
 (0)