Skip to content

Commit 7433033

Browse files
committed
feat(strutil): add SplitSkipEmpty function to split strings and filter out empty parts
This commit introduces a new function `SplitSkipEmpty` in the `strutil` package. The function splits a string by a given separator and returns a slice of non-empty substrings. It trims whitespace from each part and excludes any empty or whitespace-only strings. Comprehensive unit tests and benchmarks are included to ensure correctness and performance.
1 parent 10235e9 commit 7433033

File tree

2 files changed

+348
-0
lines changed

2 files changed

+348
-0
lines changed

strutil/string.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,22 @@ func IsEmpty(s string) bool {
2222
func IsNotEmpty(s string) bool {
2323
return s != "" && len(strings.TrimSpace(s)) > 0
2424
}
25+
26+
// SplitSkipEmpty splits the string s by sep and returns a slice of non-empty substrings.
27+
// If s is empty, it returns nil.
28+
// Empty substrings and whitespace-only substrings (after trimming) are filtered out.
29+
// Leading and trailing whitespace of each substring is removed before checking.
30+
func SplitSkipEmpty(s, sep string) []string {
31+
if s == "" {
32+
return nil
33+
}
34+
parts := strings.Split(s, sep)
35+
result := make([]string, 0, len(parts))
36+
for _, part := range parts {
37+
p := strings.TrimSpace(part)
38+
if IsNotEmpty(p) {
39+
result = append(result, p)
40+
}
41+
}
42+
return result
43+
}

strutil/string_test.go

Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,3 +787,332 @@ func BenchmarkIsNotEmpty_NonEmptyString(b *testing.B) {
787787
_ = strutil.IsNotEmpty("hello")
788788
}
789789
}
790+
791+
// TestSplitSkipEmpty 测试 SplitSkipEmpty 函数
792+
func TestSplitSkipEmpty(t *testing.T) {
793+
tests := []struct {
794+
name string
795+
s string
796+
sep string
797+
expected []string
798+
}{
799+
{
800+
name: "Empty string",
801+
s: "",
802+
sep: ",",
803+
expected: nil,
804+
},
805+
{
806+
name: "Simple split",
807+
s: "a,b,c",
808+
sep: ",",
809+
expected: []string{"a", "b", "c"},
810+
},
811+
{
812+
name: "Split with empty strings in middle",
813+
s: "a,,b,,c",
814+
sep: ",",
815+
expected: []string{"a", "b", "c"},
816+
},
817+
{
818+
name: "Split with empty string at start",
819+
s: ",a,b,c",
820+
sep: ",",
821+
expected: []string{"a", "b", "c"},
822+
},
823+
{
824+
name: "Split with empty string at end",
825+
s: "a,b,c,",
826+
sep: ",",
827+
expected: []string{"a", "b", "c"},
828+
},
829+
{
830+
name: "Split with empty strings at both ends",
831+
s: ",a,b,c,",
832+
sep: ",",
833+
expected: []string{"a", "b", "c"},
834+
},
835+
{
836+
name: "Only empty strings",
837+
s: ",,,",
838+
sep: ",",
839+
expected: []string{},
840+
},
841+
{
842+
name: "Single character separator",
843+
s: "a|b|c",
844+
sep: "|",
845+
expected: []string{"a", "b", "c"},
846+
},
847+
{
848+
name: "Multi-character separator",
849+
s: "a||b||c",
850+
sep: "||",
851+
expected: []string{"a", "b", "c"},
852+
},
853+
{
854+
name: "Space separator",
855+
s: "a b c",
856+
sep: " ",
857+
expected: []string{"a", "b", "c"},
858+
},
859+
{
860+
name: "Space separator with multiple spaces",
861+
s: "a b c",
862+
sep: " ",
863+
expected: []string{"a", "b", "c"},
864+
},
865+
{
866+
name: "Tab separator",
867+
s: "a\tb\tc",
868+
sep: "\t",
869+
expected: []string{"a", "b", "c"},
870+
},
871+
{
872+
name: "Newline separator",
873+
s: "a\nb\nc",
874+
sep: "\n",
875+
expected: []string{"a", "b", "c"},
876+
},
877+
{
878+
name: "Single element",
879+
s: "a",
880+
sep: ",",
881+
expected: []string{"a"},
882+
},
883+
{
884+
name: "No separator in string",
885+
s: "abc",
886+
sep: ",",
887+
expected: []string{"abc"},
888+
},
889+
{
890+
name: "Separator not found",
891+
s: "a,b,c",
892+
sep: "|",
893+
expected: []string{"a,b,c"},
894+
},
895+
{
896+
name: "Empty separator",
897+
s: "abc",
898+
sep: "",
899+
expected: []string{"a", "b", "c"},
900+
},
901+
{
902+
name: "String with numbers",
903+
s: "1,2,3,4,5",
904+
sep: ",",
905+
expected: []string{"1", "2", "3", "4", "5"},
906+
},
907+
{
908+
name: "String with special characters",
909+
s: "a!b!c",
910+
sep: "!",
911+
expected: []string{"a", "b", "c"},
912+
},
913+
{
914+
name: "String with unicode characters",
915+
s: "你好,世界,测试",
916+
sep: ",",
917+
expected: []string{"你好", "世界", "测试"},
918+
},
919+
{
920+
name: "String with mixed content and empty strings",
921+
s: "a,,b, ,c,,",
922+
sep: ",",
923+
expected: []string{"a", "b", "c"},
924+
},
925+
{
926+
name: "Long string",
927+
s: "a,b,c,d,e,f,g,h,i,j",
928+
sep: ",",
929+
expected: []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"},
930+
},
931+
{
932+
name: "String with only separator",
933+
s: ",",
934+
sep: ",",
935+
expected: []string{},
936+
},
937+
{
938+
name: "String with multiple consecutive separators",
939+
s: "a,,,b,,,c",
940+
sep: ",",
941+
expected: []string{"a", "b", "c"},
942+
},
943+
{
944+
name: "String with leading and trailing spaces in elements",
945+
s: " a , b , c ",
946+
sep: ",",
947+
expected: []string{"a", "b", "c"},
948+
},
949+
{
950+
name: "String with spaces around separators",
951+
s: "a , b , c",
952+
sep: ",",
953+
expected: []string{"a", "b", "c"},
954+
},
955+
{
956+
name: "String with tabs and spaces",
957+
s: "a\t,\tb\t,\tc",
958+
sep: ",",
959+
expected: []string{"a", "b", "c"},
960+
},
961+
{
962+
name: "String with only whitespace segments",
963+
s: " , , ",
964+
sep: ",",
965+
expected: []string{},
966+
},
967+
}
968+
969+
for _, tt := range tests {
970+
t.Run(tt.name, func(t *testing.T) {
971+
result := strutil.SplitSkipEmpty(tt.s, tt.sep)
972+
if !equalStringSlice(result, tt.expected) {
973+
t.Errorf("SplitSkipEmpty(%q, %q) = %v, want %v", tt.s, tt.sep, result, tt.expected)
974+
}
975+
})
976+
}
977+
}
978+
979+
// equalStringSlice compares two string slices for equality
980+
func equalStringSlice(a, b []string) bool {
981+
if len(a) != len(b) {
982+
return false
983+
}
984+
if a == nil && b == nil {
985+
return true
986+
}
987+
if a == nil || b == nil {
988+
return false
989+
}
990+
for i := range a {
991+
if a[i] != b[i] {
992+
return false
993+
}
994+
}
995+
return true
996+
}
997+
998+
// TestSplitSkipEmpty_EdgeCases 测试 SplitSkipEmpty 函数的边界情况
999+
func TestSplitSkipEmpty_EdgeCases(t *testing.T) {
1000+
tests := []struct {
1001+
name string
1002+
s string
1003+
sep string
1004+
validate func([]string) bool
1005+
}{
1006+
{
1007+
name: "Very long string",
1008+
s: strings.Repeat("a,", 1000) + "b",
1009+
sep: ",",
1010+
validate: func(result []string) bool {
1011+
return len(result) == 1001 && result[0] == "a" && result[1000] == "b"
1012+
},
1013+
},
1014+
{
1015+
name: "String with whitespace-only segments",
1016+
s: "a, ,b, ,c",
1017+
sep: ",",
1018+
validate: func(result []string) bool {
1019+
// Whitespace-only strings are trimmed and filtered out
1020+
return len(result) == 3 && result[0] == "a" && result[1] == "b" && result[2] == "c"
1021+
},
1022+
},
1023+
{
1024+
name: "String with tab and newline",
1025+
s: "a\tb\nc",
1026+
sep: "\t",
1027+
validate: func(result []string) bool {
1028+
return len(result) == 2 && result[0] == "a" && result[1] == "b\nc"
1029+
},
1030+
},
1031+
{
1032+
name: "Unicode separator",
1033+
s: "a你好b你好c",
1034+
sep: "你好",
1035+
validate: func(result []string) bool {
1036+
return len(result) == 3 && result[0] == "a" && result[1] == "b" && result[2] == "c"
1037+
},
1038+
},
1039+
}
1040+
1041+
for _, tt := range tests {
1042+
t.Run(tt.name, func(t *testing.T) {
1043+
result := strutil.SplitSkipEmpty(tt.s, tt.sep)
1044+
if !tt.validate(result) {
1045+
t.Errorf("SplitSkipEmpty(%q, %q) = %v, validation failed", tt.s, tt.sep, result)
1046+
}
1047+
})
1048+
}
1049+
}
1050+
1051+
// TestSplitSkipEmpty_Consistency 测试 SplitSkipEmpty 的一致性
1052+
func TestSplitSkipEmpty_Consistency(t *testing.T) {
1053+
testCases := []struct {
1054+
s string
1055+
sep string
1056+
}{
1057+
{"a,b,c", ","},
1058+
{"a,,b,,c", ","},
1059+
{"", ","},
1060+
{"a", ","},
1061+
{"a,b", ","},
1062+
}
1063+
1064+
for _, tc := range testCases {
1065+
t.Run(tc.s, func(t *testing.T) {
1066+
// 多次调用应该返回相同结果
1067+
results := make([][]string, 10)
1068+
for i := 0; i < 10; i++ {
1069+
results[i] = strutil.SplitSkipEmpty(tc.s, tc.sep)
1070+
}
1071+
1072+
first := results[0]
1073+
for i, result := range results {
1074+
if !equalStringSlice(result, first) {
1075+
t.Errorf("SplitSkipEmpty(%q, %q) returned inconsistent value: result[0] = %v, result[%d] = %v", tc.s, tc.sep, first, i, result)
1076+
}
1077+
}
1078+
})
1079+
}
1080+
}
1081+
1082+
// BenchmarkSplitSkipEmpty 基准测试 SplitSkipEmpty 函数
1083+
func BenchmarkSplitSkipEmpty(b *testing.B) {
1084+
testString := "a,b,c,d,e,f,g,h,i,j"
1085+
sep := ","
1086+
b.ResetTimer()
1087+
for i := 0; i < b.N; i++ {
1088+
_ = strutil.SplitSkipEmpty(testString, sep)
1089+
}
1090+
}
1091+
1092+
// BenchmarkSplitSkipEmpty_WithEmptyStrings 基准测试 SplitSkipEmpty 函数(包含空字符串)
1093+
func BenchmarkSplitSkipEmpty_WithEmptyStrings(b *testing.B) {
1094+
testString := "a,,b,,c,,d,,e"
1095+
sep := ","
1096+
b.ResetTimer()
1097+
for i := 0; i < b.N; i++ {
1098+
_ = strutil.SplitSkipEmpty(testString, sep)
1099+
}
1100+
}
1101+
1102+
// BenchmarkSplitSkipEmpty_LongString 基准测试 SplitSkipEmpty 函数(长字符串)
1103+
func BenchmarkSplitSkipEmpty_LongString(b *testing.B) {
1104+
testString := strings.Repeat("a,", 1000) + "b"
1105+
sep := ","
1106+
b.ResetTimer()
1107+
for i := 0; i < b.N; i++ {
1108+
_ = strutil.SplitSkipEmpty(testString, sep)
1109+
}
1110+
}
1111+
1112+
// BenchmarkSplitSkipEmpty_EmptyString 基准测试 SplitSkipEmpty 函数(空字符串)
1113+
func BenchmarkSplitSkipEmpty_EmptyString(b *testing.B) {
1114+
b.ResetTimer()
1115+
for i := 0; i < b.N; i++ {
1116+
_ = strutil.SplitSkipEmpty("", ",")
1117+
}
1118+
}

0 commit comments

Comments
 (0)