Skip to content

Commit cdb3c82

Browse files
committed
query: Optimize away allocations in string matching
Improves performance by ~4x or more. ``` $ benchstat old.txt new.txt goos: darwin goarch: arm64 pkg: github.com/parca-dev/parca/pkg/query cpu: Apple M1 Max │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ StringMatching/equalFoldBytes-10 58.745n ± 1% 2.923n ± 7% -95.02% (p=0.000 n=10) StringMatching/containsFoldBytes-10 69.07n ± 7% 18.68n ± 3% -72.95% (p=0.000 n=10) StringMatching/hasPrefixFoldBytes-10 62.72n ± 2% 10.06n ± 13% -83.96% (p=0.000 n=10) StringMatchingLongStrings/containsFoldBytes-10 203.30n ± 2% 55.16n ± 2% -72.87% (p=0.000 n=10) geomean 84.81n 13.19n -84.44% ```
1 parent 6a48b1a commit cdb3c82

File tree

2 files changed

+73
-15
lines changed

2 files changed

+73
-15
lines changed

pkg/query/columnquery.go

Lines changed: 73 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -982,33 +982,92 @@ func matchesFrameFilter(r *profile.RecordReader, locationIndex, lineIndex int, f
982982
return true
983983
}
984984

985+
// toLower converts ASCII byte to lowercase without allocation.
986+
// Non-ASCII bytes are left unchanged.
987+
func toLower(b byte) byte {
988+
if b >= 'A' && b <= 'Z' {
989+
return b + ('a' - 'A')
990+
}
991+
return b
992+
}
993+
994+
// equalFoldBytes performs case-insensitive comparison without allocation.
995+
func equalFoldBytes(s, t []byte) bool {
996+
if len(s) != len(t) {
997+
return false
998+
}
999+
for i := 0; i < len(s); i++ {
1000+
if toLower(s[i]) != toLower(t[i]) {
1001+
return false
1002+
}
1003+
}
1004+
return true
1005+
}
1006+
1007+
// containsFoldBytes performs case-insensitive contains check without allocation.
1008+
func containsFoldBytes(s, substr []byte) bool {
1009+
if len(substr) == 0 {
1010+
return true
1011+
}
1012+
if len(substr) > len(s) {
1013+
return false
1014+
}
1015+
1016+
// Search for substring
1017+
for i := 0; i <= len(s)-len(substr); i++ {
1018+
// Check if substring matches at position i
1019+
match := true
1020+
for j := 0; j < len(substr); j++ {
1021+
if toLower(s[i+j]) != toLower(substr[j]) {
1022+
match = false
1023+
break
1024+
}
1025+
}
1026+
if match {
1027+
return true
1028+
}
1029+
}
1030+
return false
1031+
}
1032+
1033+
// hasPrefixFoldBytes performs case-insensitive prefix check without allocation.
1034+
func hasPrefixFoldBytes(s, prefix []byte) bool {
1035+
if len(prefix) > len(s) {
1036+
return false
1037+
}
1038+
for i := 0; i < len(prefix); i++ {
1039+
if toLower(s[i]) != toLower(prefix[i]) {
1040+
return false
1041+
}
1042+
}
1043+
return true
1044+
}
1045+
9851046
// matchesStringCondition checks if a value matches a string condition.
9861047
func matchesStringCondition(value []byte, condition *pb.StringCondition) bool {
9871048
if condition == nil {
9881049
return true
9891050
}
9901051

991-
valueLower := bytes.ToLower(value)
992-
9931052
switch condition.GetCondition().(type) {
9941053
case *pb.StringCondition_Equal:
995-
target := bytes.ToLower([]byte(condition.GetEqual()))
996-
return bytes.Equal(valueLower, target)
1054+
target := []byte(condition.GetEqual())
1055+
return equalFoldBytes(value, target)
9971056
case *pb.StringCondition_NotEqual:
998-
target := bytes.ToLower([]byte(condition.GetNotEqual()))
999-
return !bytes.Equal(valueLower, target)
1057+
target := []byte(condition.GetNotEqual())
1058+
return !equalFoldBytes(value, target)
10001059
case *pb.StringCondition_Contains:
1001-
target := bytes.ToLower([]byte(condition.GetContains()))
1002-
return bytes.Contains(valueLower, target)
1060+
target := []byte(condition.GetContains())
1061+
return containsFoldBytes(value, target)
10031062
case *pb.StringCondition_NotContains:
1004-
target := bytes.ToLower([]byte(condition.GetNotContains()))
1005-
return !bytes.Contains(valueLower, target)
1063+
target := []byte(condition.GetNotContains())
1064+
return !containsFoldBytes(value, target)
10061065
case *pb.StringCondition_StartsWith:
1007-
target := bytes.ToLower([]byte(condition.GetStartsWith()))
1008-
return bytes.HasPrefix(valueLower, target)
1066+
target := []byte(condition.GetStartsWith())
1067+
return hasPrefixFoldBytes(value, target)
10091068
case *pb.StringCondition_NotStartsWith:
1010-
target := bytes.ToLower([]byte(condition.GetNotStartsWith()))
1011-
return !bytes.HasPrefix(valueLower, target)
1069+
target := []byte(condition.GetNotStartsWith())
1070+
return !hasPrefixFoldBytes(value, target)
10121071
default:
10131072
return true
10141073
}

pkg/query/string_match_bench_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
pb "github.com/parca-dev/parca/gen/proto/go/parca/query/v1alpha1"
2020
)
2121

22-
// Benchmark comparing old bytes.ToLower approach vs new zero-allocation approach
2322
func BenchmarkStringMatching(b *testing.B) {
2423
testValue := []byte("runtime.goexit")
2524
testTarget := []byte("goexit")

0 commit comments

Comments
 (0)