Skip to content

Commit 258efe0

Browse files
authored
Optimize string matching (#6121)
* query: Add benchmark for string matching * query: Optimize away allocations in string matching Improves performance by ~4x or more. ``` $ benchstat old.txt new.txt goos: darwin goarch: arm64 pkg: github.com/parca-dev/parca/pkg/query cpu: Apple M1 Max │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ StringMatching/equalFoldBytes-10 58.745n ± 1% 2.923n ± 7% -95.02% (p=0.000 n=10) StringMatching/containsFoldBytes-10 69.07n ± 7% 18.68n ± 3% -72.95% (p=0.000 n=10) StringMatching/hasPrefixFoldBytes-10 62.72n ± 2% 10.06n ± 13% -83.96% (p=0.000 n=10) StringMatchingLongStrings/containsFoldBytes-10 203.30n ± 2% 55.16n ± 2% -72.87% (p=0.000 n=10) geomean 84.81n 13.19n -84.44% ```
1 parent 67e7b1d commit 258efe0

File tree

2 files changed

+151
-14
lines changed

2 files changed

+151
-14
lines changed

pkg/query/columnquery.go

Lines changed: 73 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -982,33 +982,92 @@ func matchesFrameFilter(r *profile.RecordReader, locationIndex, lineIndex int, f
982982
return true
983983
}
984984

985+
// toLower converts ASCII byte to lowercase without allocation.
986+
// Non-ASCII bytes are left unchanged.
987+
func toLower(b byte) byte {
988+
if b >= 'A' && b <= 'Z' {
989+
return b + ('a' - 'A')
990+
}
991+
return b
992+
}
993+
994+
// equalFoldBytes performs case-insensitive comparison without allocation.
995+
func equalFoldBytes(s, t []byte) bool {
996+
if len(s) != len(t) {
997+
return false
998+
}
999+
for i := 0; i < len(s); i++ {
1000+
if toLower(s[i]) != toLower(t[i]) {
1001+
return false
1002+
}
1003+
}
1004+
return true
1005+
}
1006+
1007+
// containsFoldBytes performs case-insensitive contains check without allocation.
1008+
func containsFoldBytes(s, substr []byte) bool {
1009+
if len(substr) == 0 {
1010+
return true
1011+
}
1012+
if len(substr) > len(s) {
1013+
return false
1014+
}
1015+
1016+
// Search for substring
1017+
for i := 0; i <= len(s)-len(substr); i++ {
1018+
// Check if substring matches at position i
1019+
match := true
1020+
for j := 0; j < len(substr); j++ {
1021+
if toLower(s[i+j]) != toLower(substr[j]) {
1022+
match = false
1023+
break
1024+
}
1025+
}
1026+
if match {
1027+
return true
1028+
}
1029+
}
1030+
return false
1031+
}
1032+
1033+
// hasPrefixFoldBytes performs case-insensitive prefix check without allocation.
1034+
func hasPrefixFoldBytes(s, prefix []byte) bool {
1035+
if len(prefix) > len(s) {
1036+
return false
1037+
}
1038+
for i := 0; i < len(prefix); i++ {
1039+
if toLower(s[i]) != toLower(prefix[i]) {
1040+
return false
1041+
}
1042+
}
1043+
return true
1044+
}
1045+
9851046
// matchesStringCondition checks if a value matches a string condition.
9861047
func matchesStringCondition(value []byte, condition *pb.StringCondition) bool {
9871048
if condition == nil {
9881049
return true
9891050
}
9901051

991-
valueLower := bytes.ToLower(value)
992-
9931052
switch condition.GetCondition().(type) {
9941053
case *pb.StringCondition_Equal:
995-
target := bytes.ToLower([]byte(condition.GetEqual()))
996-
return bytes.Equal(valueLower, target)
1054+
target := []byte(condition.GetEqual())
1055+
return equalFoldBytes(value, target)
9971056
case *pb.StringCondition_NotEqual:
998-
target := bytes.ToLower([]byte(condition.GetNotEqual()))
999-
return !bytes.Equal(valueLower, target)
1057+
target := []byte(condition.GetNotEqual())
1058+
return !equalFoldBytes(value, target)
10001059
case *pb.StringCondition_Contains:
1001-
target := bytes.ToLower([]byte(condition.GetContains()))
1002-
return bytes.Contains(valueLower, target)
1060+
target := []byte(condition.GetContains())
1061+
return containsFoldBytes(value, target)
10031062
case *pb.StringCondition_NotContains:
1004-
target := bytes.ToLower([]byte(condition.GetNotContains()))
1005-
return !bytes.Contains(valueLower, target)
1063+
target := []byte(condition.GetNotContains())
1064+
return !containsFoldBytes(value, target)
10061065
case *pb.StringCondition_StartsWith:
1007-
target := bytes.ToLower([]byte(condition.GetStartsWith()))
1008-
return bytes.HasPrefix(valueLower, target)
1066+
target := []byte(condition.GetStartsWith())
1067+
return hasPrefixFoldBytes(value, target)
10091068
case *pb.StringCondition_NotStartsWith:
1010-
target := bytes.ToLower([]byte(condition.GetNotStartsWith()))
1011-
return !bytes.HasPrefix(valueLower, target)
1069+
target := []byte(condition.GetNotStartsWith())
1070+
return !hasPrefixFoldBytes(value, target)
10121071
default:
10131072
return true
10141073
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// Copyright 2022-2025 The Parca Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package query
15+
16+
import (
17+
"testing"
18+
19+
pb "github.com/parca-dev/parca/gen/proto/go/parca/query/v1alpha1"
20+
)
21+
22+
func BenchmarkStringMatching(b *testing.B) {
23+
testValue := []byte("runtime.goexit")
24+
testTarget := []byte("goexit")
25+
26+
b.Run("equalFoldBytes", func(b *testing.B) {
27+
condition := &pb.StringCondition{
28+
Condition: &pb.StringCondition_Equal{
29+
Equal: string(testTarget),
30+
},
31+
}
32+
b.ResetTimer()
33+
for i := 0; i < b.N; i++ {
34+
_ = matchesStringCondition(testValue, condition)
35+
}
36+
})
37+
38+
b.Run("containsFoldBytes", func(b *testing.B) {
39+
condition := &pb.StringCondition{
40+
Condition: &pb.StringCondition_Contains{
41+
Contains: string(testTarget),
42+
},
43+
}
44+
b.ResetTimer()
45+
for i := 0; i < b.N; i++ {
46+
_ = matchesStringCondition(testValue, condition)
47+
}
48+
})
49+
50+
b.Run("hasPrefixFoldBytes", func(b *testing.B) {
51+
condition := &pb.StringCondition{
52+
Condition: &pb.StringCondition_StartsWith{
53+
StartsWith: "runtime",
54+
},
55+
}
56+
b.ResetTimer()
57+
for i := 0; i < b.N; i++ {
58+
_ = matchesStringCondition(testValue, condition)
59+
}
60+
})
61+
}
62+
63+
func BenchmarkStringMatchingLongStrings(b *testing.B) {
64+
testValue := []byte("github.com/parca-dev/parca/pkg/profilestore/profilestoreserver.(*ProfileStoreServer).WriteRaw")
65+
testTarget := []byte("profilestore")
66+
67+
b.Run("containsFoldBytes", func(b *testing.B) {
68+
condition := &pb.StringCondition{
69+
Condition: &pb.StringCondition_Contains{
70+
Contains: string(testTarget),
71+
},
72+
}
73+
b.ResetTimer()
74+
for i := 0; i < b.N; i++ {
75+
_ = matchesStringCondition(testValue, condition)
76+
}
77+
})
78+
}

0 commit comments

Comments
 (0)