Skip to content

Commit 7121600

Browse files
committed
[test] Move useful helpers into StdlibUnicodeUnittest
1 parent 3f2550f commit 7121600

File tree

2 files changed

+162
-146
lines changed

2 files changed

+162
-146
lines changed

stdlib/private/StdlibUnicodeUnittest/StdlibUnicodeUnittest.swift

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,3 +779,142 @@ public let utf16Tests = [
779779
],
780780
]
781781

782+
extension String {
783+
/// Print out a full list of indices in every view of this string.
784+
/// This is useful while debugging string indexing issues.
785+
public func dumpIndices() {
786+
print("-------------------------------------------------------------------")
787+
print("String: \(String(reflecting: self))")
788+
print("Characters:")
789+
self.indices.forEach { i in
790+
let char = self[i]
791+
print(" \(i) -> \(String(reflecting: char))")
792+
}
793+
print("Scalars:")
794+
self.unicodeScalars.indices.forEach { i in
795+
let scalar = self.unicodeScalars[i]
796+
let value = String(scalar.value, radix: 16, uppercase: true)
797+
let padding = String(repeating: "0", count: max(0, 4 - value.count))
798+
let name = scalar.properties.name ?? "\(scalar.debugDescription)"
799+
print(" \(i) -> U+\(padding)\(value) \(name)")
800+
}
801+
print("UTF-8:")
802+
self.utf8.indices.forEach { i in
803+
let code = self.utf8[i]
804+
let value = String(code, radix: 16, uppercase: true)
805+
let padding = value.count < 2 ? "0" : ""
806+
print(" \(i) -> \(padding)\(value)")
807+
}
808+
print("UTF-16:")
809+
self.utf16.indices.forEach { i in
810+
let code = self.utf16[i]
811+
let value = String(code, radix: 16, uppercase: true)
812+
let padding = String(repeating: "0", count: 4 - value.count)
813+
print(" \(i) -> \(padding)\(value)")
814+
}
815+
}
816+
817+
// Returns a list of every valid index in every string view, optionally
818+
// including end indices. We keep equal indices originating from different
819+
// views because they may have different grapheme size caches or flags etc.
820+
public func allIndices(includingEnd: Bool = true) -> [String.Index] {
821+
var r = Array(self.indices)
822+
if includingEnd { r.append(self.endIndex) }
823+
r += Array(self.unicodeScalars.indices)
824+
if includingEnd { r.append(self.unicodeScalars.endIndex) }
825+
r += Array(self.utf8.indices)
826+
if includingEnd { r.append(self.utf8.endIndex) }
827+
r += Array(self.utf16.indices)
828+
if includingEnd { r.append(self.utf16.endIndex) }
829+
return r
830+
}
831+
}
832+
833+
extension Substring {
834+
// Returns a list of every valid index in every substring view, optionally
835+
// including end indices. We keep equal indices originating from different
836+
// views because they may have different grapheme size caches or flags etc.
837+
public func allIndices(includingEnd: Bool = true) -> [String.Index] {
838+
var r = Array(self.indices)
839+
if includingEnd { r.append(self.endIndex) }
840+
r += Array(self.unicodeScalars.indices)
841+
if includingEnd { r.append(self.unicodeScalars.endIndex) }
842+
r += Array(self.utf8.indices)
843+
if includingEnd { r.append(self.utf8.endIndex) }
844+
r += Array(self.utf16.indices)
845+
if includingEnd { r.append(self.utf16.endIndex) }
846+
return r
847+
}
848+
}
849+
850+
extension Collection {
851+
// Assuming both `self` and `other` use the same index space, call `body` for
852+
// each index `i` in `other`, along with the slice in `self` that begins at
853+
// `i` and ends at the index following it in `other`.
854+
//
855+
// `other` must start with an item that is less than or equal to the first
856+
// item in `self`.
857+
func forEachIndexGroup<G: Collection>(
858+
by other: G,
859+
body: (G.Index, Self.SubSequence, Int) throws -> Void
860+
) rethrows
861+
where G.Index == Self.Index
862+
{
863+
if other.isEmpty {
864+
assert(self.isEmpty)
865+
return
866+
}
867+
var i = other.startIndex
868+
var j = self.startIndex
869+
var offset = 0
870+
while i != other.endIndex {
871+
let current = i
872+
other.formIndex(after: &i)
873+
let start = j
874+
while j < i, j < self.endIndex {
875+
self.formIndex(after: &j)
876+
}
877+
let end = j
878+
try body(current, self[start ..< end], offset)
879+
offset += 1
880+
}
881+
}
882+
}
883+
884+
extension String {
885+
/// Returns a dictionary mapping each valid index to the index that addresses
886+
/// the nearest scalar boundary, rounding down.
887+
public func scalarMap() -> [Index: (index: Index, offset: Int)] {
888+
var map: [Index: (index: Index, offset: Int)] = [:]
889+
890+
utf8.forEachIndexGroup(by: unicodeScalars) { scalar, slice, offset in
891+
for i in slice.indices { map[i] = (scalar, offset) }
892+
}
893+
utf16.forEachIndexGroup(by: unicodeScalars) { scalar, slice, offset in
894+
for i in slice.indices { map[i] = (scalar, offset) }
895+
}
896+
self.forEachIndexGroup(by: unicodeScalars) { scalar, slice, offset in
897+
for i in slice.indices { map[i] = (scalar, offset) }
898+
}
899+
map[endIndex] = (endIndex, unicodeScalars.count)
900+
return map
901+
}
902+
903+
/// Returns a dictionary mapping each valid index to the index that addresses
904+
/// the nearest character boundary, rounding down.
905+
public func characterMap() -> [Index: (index: Index, offset: Int)] {
906+
var map: [Index: (index: Index, offset: Int)] = [:]
907+
utf8.forEachIndexGroup(by: self) { char, slice, offset in
908+
for i in slice.indices { map[i] = (char, offset) }
909+
}
910+
utf16.forEachIndexGroup(by: self) { char, slice, offset in
911+
for i in slice.indices { map[i] = (char, offset) }
912+
}
913+
unicodeScalars.forEachIndexGroup(by: self) { char, slice, offset in
914+
for i in slice.indices { map[i] = (char, offset) }
915+
}
916+
map[endIndex] = (endIndex, count)
917+
return map
918+
}
919+
}
920+

test/stdlib/StringIndex.swift

Lines changed: 23 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
// RUN: %target-run-simple-swift
1+
// RUN: %target-run-stdlib-swift %S/Inputs/
22
// REQUIRES: executable_test
33
// UNSUPPORTED: freestanding
44

55
import StdlibUnittest
66
#if _runtime(_ObjC)
77
import Foundation
88
#endif
9+
import StdlibUnicodeUnittest
910

1011
var suite = TestSuite("StringIndexTests")
1112
defer { runAllTests() }
@@ -18,40 +19,6 @@ enum SimpleString: String {
1819
case emoji = "😀😃🤢🤮👩🏿‍🎤🧛🏻‍♂️🧛🏻‍♂️👩‍👩‍👦‍👦"
1920
}
2021

21-
/// Print out a full list of indices in every view of `string`.
22-
/// This is useful while debugging test failures in this test.
23-
func dumpIndices(_ string: String) {
24-
print("-------------------------------------------------------------------")
25-
print("String: \(String(reflecting: string))")
26-
print("Characters:")
27-
string.indices.forEach { i in
28-
let char = string[i]
29-
print(" \(i) -> \(String(reflecting: char))")
30-
}
31-
print("Scalars:")
32-
string.unicodeScalars.indices.forEach { i in
33-
let scalar = string.unicodeScalars[i]
34-
let value = String(scalar.value, radix: 16, uppercase: true)
35-
let padding = String(repeating: "0", count: max(0, 4 - value.count))
36-
let name = scalar.properties.name ?? "\(scalar.debugDescription)"
37-
print(" \(i) -> U+\(padding)\(value) \(name)")
38-
}
39-
print("UTF-8:")
40-
string.utf8.indices.forEach { i in
41-
let code = string.utf8[i]
42-
let value = String(code, radix: 16, uppercase: true)
43-
let padding = value.count < 2 ? "0" : ""
44-
print(" \(i) -> \(padding)\(value)")
45-
}
46-
print("UTF-16:")
47-
string.utf16.indices.forEach { i in
48-
let code = string.utf16[i]
49-
let value = String(code, radix: 16, uppercase: true)
50-
let padding = String(repeating: "0", count: 4 - value.count)
51-
print(" \(i) -> \(padding)\(value)")
52-
}
53-
}
54-
5522
let simpleStrings: [String] = [
5623
SimpleString.smallASCII.rawValue,
5724
SimpleString.smallUnicode.rawValue,
@@ -352,7 +319,7 @@ suite.test("Exhaustive Index Interchange")
352319
return
353320
}
354321

355-
//dumpIndices(str)
322+
//str.dumpIndices()
356323

357324
var curCharIdx = str.startIndex
358325
var curScalarIdx = str.startIndex
@@ -482,111 +449,7 @@ suite.test("Exhaustive Index Interchange")
482449
}
483450
#endif
484451

485-
extension Collection {
486-
// Assuming both `self` and `other` use the same index space, call `body` for
487-
// each index `i` in `other`, along with the slice in `self` that begins at
488-
// `i` and ends at the index following it in `other`.
489-
//
490-
// `other` must start with an item that is less than or equal to the first
491-
// item in `self`.
492-
func forEachIndexGroup<G: Collection>(
493-
by other: G,
494-
body: (G.Index, Self.SubSequence, Int) throws -> Void
495-
) rethrows
496-
where G.Index == Self.Index
497-
{
498-
if other.isEmpty {
499-
assert(self.isEmpty)
500-
return
501-
}
502-
var i = other.startIndex
503-
var j = self.startIndex
504-
var offset = 0
505-
while i != other.endIndex {
506-
let current = i
507-
other.formIndex(after: &i)
508-
let start = j
509-
while j < i, j < self.endIndex {
510-
self.formIndex(after: &j)
511-
}
512-
let end = j
513-
try body(current, self[start ..< end], offset)
514-
offset += 1
515-
}
516-
}
517-
}
518-
519-
extension String {
520-
// Returns a list of every valid index in every string view, optionally
521-
// including end indices. We keep equal indices originating from different
522-
// views because they may have different grapheme size caches or flags etc.
523-
func allIndices(includingEnd: Bool = true) -> [String.Index] {
524-
var r = Array(self.indices)
525-
if includingEnd { r.append(self.endIndex) }
526-
r += Array(self.unicodeScalars.indices)
527-
if includingEnd { r.append(self.unicodeScalars.endIndex) }
528-
r += Array(self.utf8.indices)
529-
if includingEnd { r.append(self.utf8.endIndex) }
530-
r += Array(self.utf16.indices)
531-
if includingEnd { r.append(self.utf16.endIndex) }
532-
return r
533-
}
534-
535-
/// Returns a dictionary mapping each valid index to the index that lies on
536-
/// the nearest scalar boundary, rounding down.
537-
func scalarMap() -> [String.Index: (index: String.Index, offset: Int)] {
538-
var map: [String.Index: (index: String.Index, offset: Int)] = [:]
539-
540-
self.utf8.forEachIndexGroup(by: self.unicodeScalars) { scalar, slice, offset in
541-
for i in slice.indices { map[i] = (scalar, offset) }
542-
}
543-
self.utf16.forEachIndexGroup(by: self.unicodeScalars) { scalar, slice, offset in
544-
for i in slice.indices { map[i] = (scalar, offset) }
545-
}
546-
self.forEachIndexGroup(by: self.unicodeScalars) { scalar, slice, offset in
547-
for i in slice.indices { map[i] = (scalar, offset) }
548-
}
549-
map[endIndex] = (endIndex, self.unicodeScalars.count)
550-
return map
551-
}
552-
553-
/// Returns a dictionary mapping each valid index to the index that lies on
554-
/// the nearest character boundary, rounding down.
555-
func characterMap() -> [String.Index: (index: String.Index, offset: Int)] {
556-
var map: [String.Index: (index: String.Index, offset: Int)] = [:]
557-
self.utf8.forEachIndexGroup(by: self) { char, slice, offset in
558-
for i in slice.indices { map[i] = (char, offset) }
559-
}
560-
self.utf16.forEachIndexGroup(by: self) { char, slice, offset in
561-
for i in slice.indices { map[i] = (char, offset) }
562-
}
563-
self.unicodeScalars.forEachIndexGroup(by: self) { char, slice, offset in
564-
for i in slice.indices { map[i] = (char, offset) }
565-
}
566-
map[endIndex] = (endIndex, count)
567-
return map
568-
}
569-
}
570-
571-
extension Substring {
572-
// Returns a list of every valid index in every string view, optionally
573-
// including end indices. We keep equal indices originating from different
574-
// views because they may have different grapheme size caches or flags etc.
575-
func allIndices(includingEnd: Bool = true) -> [String.Index] {
576-
var r = Array(self.indices)
577-
if includingEnd { r.append(self.endIndex) }
578-
r += Array(self.unicodeScalars.indices)
579-
if includingEnd { r.append(self.unicodeScalars.endIndex) }
580-
r += Array(self.utf8.indices)
581-
if includingEnd { r.append(self.utf8.endIndex) }
582-
r += Array(self.utf16.indices)
583-
if includingEnd { r.append(self.utf16.endIndex) }
584-
return r
585-
}
586-
}
587-
588-
suite.test("Fully exhaustive index interchange")
589-
.forEach(in: examples) { string in
452+
func fullyExhaustiveIndexInterchange(_ string: String) {
590453
guard #available(SwiftStdlib 5.7, *) else {
591454
// Index navigation in 5.7 always rounds input indices down to the nearest
592455
// Character, so that we always have a well-defined distance between
@@ -596,7 +459,7 @@ suite.test("Fully exhaustive index interchange")
596459
return
597460
}
598461

599-
//dumpIndices(string)
462+
//string.dumpIndices()
600463

601464
let scalarMap = string.scalarMap()
602465
let characterMap = string.characterMap()
@@ -736,6 +599,18 @@ suite.test("Fully exhaustive index interchange")
736599
}
737600
}
738601

602+
suite.test("Fully exhaustive index interchange")
603+
.forEach(in: examples) { string in
604+
fullyExhaustiveIndexInterchange(string)
605+
}
606+
607+
suite.test("Fully exhaustive index interchange/GraphemeBreakTests") {
608+
for string in graphemeBreakTests.map { $0.0 } {
609+
fullyExhaustiveIndexInterchange(string)
610+
}
611+
}
612+
613+
739614
suite.test("Global vs local grapheme cluster boundaries") {
740615
guard #available(SwiftStdlib 5.7, *) else {
741616
// Index navigation in 5.7 always rounds input indices down to the nearest
@@ -864,14 +739,14 @@ suite.test("Index encoding correction") {
864739
// If the mutation's effect included the data addressed by the original index,
865740
// then we may still get nonsensical results.
866741
var s = ("🫱🏼‍🫲🏽 a 🧑🏽‍🌾 b" as NSString) as String
867-
//dumpIndices(s)
742+
//s.dumpIndices()
868743

869744
let originals = s.allIndices(includingEnd: false).map {
870745
($0, s[$0], s.unicodeScalars[$0], s.utf8[$0], s.utf16[$0])
871746
}
872747

873748
s.append(".")
874-
//dumpIndices(s)
749+
//s.dumpIndices()
875750

876751
for (i, char, scalar, u8, u16) in originals {
877752
expectEqual(s[i], char, "i: \(i)")
@@ -893,7 +768,7 @@ suite.test("String.replaceSubrange index validation")
893768
return
894769
}
895770

896-
//dumpIndices(string)
771+
//string.dumpIndices()
897772

898773
let scalarMap = string.scalarMap()
899774
let allIndices = string.allIndices()
@@ -958,12 +833,13 @@ suite.test("Substring.replaceSubrange index validation")
958833
return
959834
}
960835

961-
dumpIndices(string)
836+
string.dumpIndices()
962837

963838
let scalarMap = string.scalarMap()
964839
let allIndices = string.allIndices()
965840

966841
for i in allIndices {
842+
print(i)
967843
for j in allIndices {
968844
guard i <= j else { continue }
969845
let si = scalarMap[i]!.index
@@ -1021,3 +897,4 @@ suite.test("Substring.replaceSubrange index validation")
1021897
}
1022898
}
1023899
}
900+

0 commit comments

Comments
 (0)