@@ -33,9 +33,50 @@ if #available(SwiftStdlib 5.8, *) {
33
33
}
34
34
}
35
35
36
+ func scalars( in buffer: some Sequence < UInt8 > ) -> [ Unicode . Scalar ] {
37
+ var result : [ Unicode . Scalar ] = [ ]
38
+ var it = buffer. makeIterator ( )
39
+ var utf8Decoder = UTF8 ( )
40
+ while true {
41
+ switch utf8Decoder. decode ( & it) {
42
+ case . scalarValue( let v) : result. append ( v)
43
+ case . emptyInput: return result
44
+ case . error: expectTrue ( false , " Invalid scalar " )
45
+ }
46
+ }
47
+ }
48
+
36
49
if #available( SwiftStdlib 5 . 8 , * ) {
37
- suite. test ( " Consistency with Swift String's behavior " ) {
38
- let sampleString = #"""
50
+ suite. test ( " Unicode test data/_firstBreak " ) {
51
+ for test in graphemeBreakTests {
52
+ var recognizer = Unicode . _CharacterRecognizer ( )
53
+ var pieces : [ [ Unicode . Scalar ] ] = [ ]
54
+ var str = test. string
55
+ str. withUTF8 { buffer in
56
+ var i = buffer. startIndex
57
+ var last = i
58
+ while i < buffer. endIndex {
59
+ guard let scalar = recognizer. _firstBreak (
60
+ inUncheckedUnsafeUTF8Buffer: buffer, startingAt: i)
61
+ else { break }
62
+
63
+ if scalar. lowerBound > last {
64
+ pieces. append ( scalars ( in: buffer [ last..< scalar. lowerBound] ) )
65
+ }
66
+
67
+ last = scalar. lowerBound
68
+ i = scalar. upperBound
69
+ }
70
+
71
+ pieces. append ( scalars ( in: buffer [ last... ] ) )
72
+ }
73
+ expectEqual ( pieces, test. pieces,
74
+ " string: \( String ( reflecting: test. string) ) " )
75
+ }
76
+ }
77
+ }
78
+
79
+ let sampleString = #"""
39
80
The powerful programming language that is also easy to learn.
40
81
손쉽게 학습할 수 있는 강력한 프로그래밍 언어.
41
82
🪙 A 🥞 short 🍰 piece 🫘 of 🌰 text 👨👨👧👧 with 👨👩👦 some 🚶🏽 emoji 🇺🇸🇨🇦 characters 🧈
@@ -52,6 +93,9 @@ if #available(SwiftStdlib 5.8, *) {
52
93
e̶̢͕̦̜͔̘̘̝͈̪̖̺̥̺̹͉͎͈̫̯̯̻͑͑̿̽͂̀̽͋́̎̈́̈̿͆̿̒̈́̽̔̇͐͛̀̓͆̏̾̀̌̈́̆̽̕ͅ
53
94
"""#
54
95
96
+ if #available( SwiftStdlib 5 . 8 , * ) {
97
+ suite. test ( " Consistency with Swift String's behavior/hasBreak " ) {
98
+
55
99
let expectedBreaks = Array ( sampleString. indices)
56
100
57
101
let u = sampleString. unicodeScalars
0 commit comments