Skip to content

Commit 76a7e73

Browse files
authored
fix issue in pprof split when using relabel.LabelDrop (#4365)
* fix broken pprof split when using relabel.LabelDrop * Add benchmark * Add test
1 parent c2e798e commit 76a7e73

File tree

2 files changed

+112
-12
lines changed

2 files changed

+112
-12
lines changed

pkg/model/pprofsplit/pprof_split.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,13 +174,13 @@ func (g *lazyGroup) addSampleGroup(stringTable []string, sg pprof.SampleGroup) {
174174
}
175175

176176
type groupsWithFingerprints struct {
177-
m map[uint64][]lazyGroup
177+
m map[uint64][]*lazyGroup
178178
order []uint64
179179
}
180180

181181
func newGroupsWithFingerprints() *groupsWithFingerprints {
182182
return &groupsWithFingerprints{
183-
m: make(map[uint64][]lazyGroup),
183+
m: make(map[uint64][]*lazyGroup),
184184
}
185185
}
186186

@@ -201,7 +201,7 @@ func (g *groupsWithFingerprints) add(stringTable []string, lbls phlaremodel.Labe
201201
}
202202

203203
// add the labels to the list
204-
g.m[fp] = append(g.m[fp], lazyGroup{
204+
g.m[fp] = append(g.m[fp], &lazyGroup{
205205
sampleGroup: group,
206206
labels: lbls,
207207
})

pkg/model/pprofsplit/pprof_split_test.go

Lines changed: 109 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package pprofsplit
22

33
import (
4+
"fmt"
5+
"math/rand"
46
"testing"
57

68
"github.com/prometheus/common/model"
@@ -350,22 +352,72 @@ func Test_VisitSampleSeries(t *testing.T) {
350352
},
351353
},
352354
},
355+
{
356+
description: "does not drop samples when a label is dropped",
357+
rules: []*relabel.Config{
358+
{
359+
Action: relabel.LabelDrop,
360+
Regex: relabel.MustNewRegexp("^label_to_drop$"),
361+
},
362+
},
363+
labels: []*typesv1.LabelPair{},
364+
profile: &profilev1.Profile{
365+
StringTable: []string{"", "label_to_drop", "value_1", "value_2"},
366+
Sample: []*profilev1.Sample{
367+
{
368+
LocationId: []uint64{1, 2},
369+
Value: []int64{2},
370+
Label: []*profilev1.Label{{Key: 1, Str: 2}},
371+
},
372+
{
373+
LocationId: []uint64{1, 3},
374+
Value: []int64{2},
375+
Label: []*profilev1.Label{{Key: 1, Str: 2}},
376+
},
377+
{
378+
LocationId: []uint64{1, 3},
379+
Value: []int64{2},
380+
Label: []*profilev1.Label{{Key: 1, Str: 3}}, // will get merged with the previous one
381+
},
382+
{
383+
LocationId: []uint64{1, 4},
384+
Value: []int64{2},
385+
Label: []*profilev1.Label{{Key: 1, Str: 3}},
386+
},
387+
},
388+
},
389+
expectProfilesDropped: 0,
390+
expectBytesDropped: 0,
391+
expected: []sampleSeries{
392+
{
393+
labels: []*typesv1.LabelPair{},
394+
samples: []*profilev1.Sample{
395+
{
396+
LocationId: []uint64{1, 2},
397+
Label: []*profilev1.Label{},
398+
Value: []int64{2},
399+
},
400+
{
401+
LocationId: []uint64{1, 3},
402+
Label: []*profilev1.Label{},
403+
Value: []int64{4},
404+
},
405+
{
406+
LocationId: []uint64{1, 4},
407+
Label: []*profilev1.Label{},
408+
Value: []int64{2},
409+
},
410+
},
411+
},
412+
},
413+
},
353414
{
354415
description: "ensure only samples of same stacktraces get grouped",
355416
labels: []*typesv1.LabelPair{
356417
{Name: "__name__", Value: "profile"},
357418
},
358419
profile: &profilev1.Profile{
359420
StringTable: []string{"", "foo", "bar", "binary", "span_id", "aaaabbbbccccdddd", "__name__"},
360-
Location: []*profilev1.Location{
361-
{Id: 1, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 1}}},
362-
{Id: 2, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 2}}},
363-
},
364-
Mapping: []*profilev1.Mapping{{}, {Id: 1, Filename: 3}},
365-
Function: []*profilev1.Function{
366-
{Id: 1, Name: 1},
367-
{Id: 2, Name: 2},
368-
},
369421
Sample: []*profilev1.Sample{
370422
{
371423
LocationId: []uint64{1, 2},
@@ -456,3 +508,51 @@ func Test_VisitSampleSeries(t *testing.T) {
456508
})
457509
}
458510
}
511+
512+
func Benchmark_VisitSampleSeries_HighCardinality(b *testing.B) {
513+
defaultRelabelConfigs := validation.MockDefaultOverrides().IngestionRelabelingRules("")
514+
defaultRelabelConfigs = append(defaultRelabelConfigs, &relabel.Config{
515+
Action: relabel.LabelDrop,
516+
Regex: relabel.MustNewRegexp("^high_cardinality_label$"),
517+
})
518+
519+
stringTable := []string{"", "foo", "bar", "binary", "span_id", "aaaabbbbccccdddd", "high_cardinality_label"}
520+
highCardinalityOffset := int64(len(stringTable))
521+
for i := 0; i < 10000; i++ {
522+
stringTable = append(stringTable, fmt.Sprintf("value_%d", i))
523+
}
524+
525+
profile := &profilev1.Profile{
526+
StringTable: stringTable,
527+
Location: []*profilev1.Location{{Id: 1, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 1}}}},
528+
Mapping: []*profilev1.Mapping{{}, {Id: 1, Filename: 3}},
529+
Function: []*profilev1.Function{{Id: 1, Name: 1}},
530+
}
531+
532+
for i := 0; i < 30000; i++ {
533+
labelValue := highCardinalityOffset + int64(i/10)
534+
if rand.Float64() < 0.3 {
535+
labelValue = highCardinalityOffset - 2 // lower the cardinality to create large groups
536+
}
537+
labels := []*profilev1.Label{
538+
{Key: highCardinalityOffset - 1, Str: labelValue},
539+
}
540+
profile.Sample = append(profile.Sample, &profilev1.Sample{
541+
LocationId: []uint64{uint64(i + 1)},
542+
Value: []int64{2},
543+
Label: labels,
544+
})
545+
}
546+
547+
b.ResetTimer()
548+
b.ReportAllocs()
549+
550+
for i := 0; i < b.N; i++ {
551+
visitor := new(mockVisitor)
552+
err := VisitSampleSeries(profile, []*typesv1.LabelPair{
553+
{Name: "__name__", Value: "profile"},
554+
{Name: "foo", Value: "bar"},
555+
}, defaultRelabelConfigs, visitor)
556+
require.NoError(b, err)
557+
}
558+
}

0 commit comments

Comments
 (0)