Skip to content

Commit e562133

Browse files
committed
[test] Improve grapheme breaking tests
Instead of just checking the number of breaks in each test case, expose and check the actual positions of those breaks, too.
1 parent 1affdf1 commit e562133

File tree

3 files changed

+69
-80
lines changed

3 files changed

+69
-80
lines changed

stdlib/private/StdlibUnicodeUnittest/GraphemeBreaking.swift

Lines changed: 29 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -15,56 +15,47 @@
1515
#if _runtime(_ObjC)
1616
import Foundation
1717

18-
func parseGraphemeBreakTests(
19-
_ data: String,
20-
into result: inout [(String, Int)]
21-
) {
22-
for line in data.split(separator: "\n") {
18+
public struct GraphemeBreakTest {
19+
public let string: String
20+
public let pieces: [[Unicode.Scalar]]
21+
22+
init?(line: some StringProtocol) {
2323
// Only look at actual tests
24-
guard line.hasPrefix("÷") else {
25-
continue
26-
}
24+
guard line.hasPrefix("÷") else { return nil }
2725

2826
let info = line.split(separator: "#")
2927
let components = info[0].split(separator: " ")
3028

3129
var string = ""
32-
var count = 0
33-
34-
for i in components.indices {
35-
guard i != 0 else {
36-
continue
37-
}
38-
39-
let scalar: Unicode.Scalar
40-
41-
// If we're an odd index, this is a scalar.
42-
if i & 0x1 == 1 {
43-
scalar = Unicode.Scalar(UInt32(components[i], radix: 16)!)!
44-
30+
var pieces: [[Unicode.Scalar]] = []
31+
32+
var piece: [Unicode.Scalar] = []
33+
for component in components {
34+
switch component {
35+
case "":
36+
break
37+
case "×": // no grapheme break opportunity
38+
break
39+
case "÷": // grapheme break opportunity
40+
guard !piece.isEmpty else { break }
41+
pieces.append(piece)
42+
piece = []
43+
case _: // hexadecimal scalar value
44+
guard let value = UInt32(component, radix: 16) else { return nil }
45+
guard let scalar = Unicode.Scalar(value) else { return nil }
4546
string.unicodeScalars.append(scalar)
46-
} else {
47-
// Otherwise, it is a grapheme breaking operator.
48-
49-
// If this is a break, record the +1 count. Otherwise it is × which is
50-
// not a break.
51-
if components[i] == "÷" {
52-
count += 1
53-
}
47+
piece.append(scalar)
5448
}
5549
}
56-
57-
result.append((string, count))
50+
if !piece.isEmpty { pieces.append(piece) }
51+
self.string = string
52+
self.pieces = pieces
5853
}
5954
}
6055

61-
public let graphemeBreakTests: [(String, Int)] = {
62-
var result: [(String, Int)] = []
63-
56+
public let graphemeBreakTests: [GraphemeBreakTest] = {
6457
let testFile = readInputFile("GraphemeBreakTest.txt")
65-
66-
parseGraphemeBreakTests(testFile, into: &result)
67-
68-
return result
58+
return testFile.split(separator: "\n")
59+
.compactMap { GraphemeBreakTest(line: $0) }
6960
}()
7061
#endif

test/stdlib/StringIndex.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -656,8 +656,8 @@ suite.test("Fully exhaustive index interchange")
656656

657657
#if _runtime(_ObjC)
658658
suite.test("Fully exhaustive index interchange/GraphemeBreakTests") {
659-
for string in graphemeBreakTests.map { $0.0 } {
660-
fullyExhaustiveIndexInterchange(string)
659+
for test in graphemeBreakTests {
660+
fullyExhaustiveIndexInterchange(test.string)
661661
}
662662
}
663663
#endif

validation-test/stdlib/StringGraphemeBreaking.swift

Lines changed: 38 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -10,47 +10,59 @@ import StdlibUnicodeUnittest
1010
import Foundation
1111

1212
let StringGraphemeBreaking = TestSuite("StringGraphemeBreaking")
13+
defer { runAllTests() }
1314

1415
extension String {
15-
var backwardsCount: Int {
16-
var c = 0
17-
var index = endIndex
18-
while index != startIndex {
19-
c += 1
20-
formIndex(before: &index)
16+
var forwardPieces: [[Unicode.Scalar]] {
17+
var i = startIndex
18+
var r: [[Unicode.Scalar]] = []
19+
while i < endIndex {
20+
let j = self.index(after: i)
21+
r.append(Array(self[i..<j].unicodeScalars))
22+
i = j
2123
}
22-
return c
24+
return r
25+
}
26+
27+
var backwardPieces: [[Unicode.Scalar]] {
28+
var j = endIndex
29+
var r: [[Unicode.Scalar]] = []
30+
while j > startIndex {
31+
let i = self.index(before: j)
32+
r.append(Array(self[i..<j].unicodeScalars))
33+
j = i
34+
}
35+
r.reverse()
36+
return r
2337
}
2438
}
2539

2640
if #available(SwiftStdlib 5.6, *) {
2741
StringGraphemeBreaking.test("grapheme breaking") {
28-
for graphemeBreakTest in graphemeBreakTests {
42+
for test in graphemeBreakTests {
2943
expectEqual(
30-
graphemeBreakTest.1,
31-
graphemeBreakTest.0.count,
32-
"string: \(String(reflecting: graphemeBreakTest.0))")
44+
test.string.forwardPieces, test.pieces,
45+
"string: \(String(reflecting: test.string)) (forward)")
3346
expectEqual(
34-
graphemeBreakTest.1,
35-
graphemeBreakTest.0.backwardsCount,
36-
"string: \(String(reflecting: graphemeBreakTest.0))")
47+
test.string.backwardPieces, test.pieces,
48+
"string: \(String(reflecting: test.string)) (backward)")
3749
}
3850
}
3951
}
4052

4153
// The most simple subclass of NSString that CoreFoundation does not know
4254
// about.
43-
class NonContiguousNSString : NSString {
55+
class NonContiguousNSString: NSString {
4456
required init(coder aDecoder: NSCoder) {
4557
fatalError("don't call this initializer")
4658
}
4759
required init(itemProviderData data: Data, typeIdentifier: String) throws {
48-
fatalError("don't call this initializer")
60+
fatalError("don't call this initializer")
4961
}
5062

51-
override init() {
63+
override init() {
5264
_value = []
53-
super.init()
65+
super.init()
5466
}
5567

5668
init(_ value: [UInt16]) {
@@ -80,33 +92,19 @@ extension _StringGuts {
8092
func _isForeign() -> Bool
8193
}
8294

83-
func getUTF16Array(from string: String) -> [UInt16] {
84-
var result: [UInt16] = []
85-
86-
for cp in string.utf16 {
87-
result.append(cp)
88-
}
89-
90-
return result
91-
}
92-
9395
if #available(SwiftStdlib 5.6, *) {
9496
StringGraphemeBreaking.test("grapheme breaking foreign") {
95-
for graphemeBreakTest in graphemeBreakTests {
96-
let foreignTest = NonContiguousNSString(
97-
getUTF16Array(from: graphemeBreakTest.0)
98-
)
99-
let test = foreignTest as String
97+
for test in graphemeBreakTests {
98+
let foreign = NonContiguousNSString(Array(test.string.utf16))
99+
let string = foreign as String
100100

101-
expectTrue(test._guts._isForeign())
101+
expectTrue(string._guts._isForeign())
102102
expectEqual(
103-
graphemeBreakTest.1, test.count,
104-
"string: \(String(reflecting: graphemeBreakTest.0))")
103+
string.forwardPieces, test.pieces,
104+
"string: \(String(reflecting: test.string)) (forward)")
105105
expectEqual(
106-
graphemeBreakTest.1, test.backwardsCount,
107-
"string: \(String(reflecting: graphemeBreakTest.0))")
106+
string.backwardPieces, test.pieces,
107+
"string: \(String(reflecting: test.string)) (backward)")
108108
}
109109
}
110110
}
111-
112-
runAllTests()

0 commit comments

Comments
 (0)