Skip to content

Commit 0b02657

Browse files
authored
Improve distributor sampling rule evaluation (#4347)
* Improve distributor sampling rule evaluation * Improve wording * Switch to strings.Contains and add test
1 parent 21136ba commit 0b02657

File tree

4 files changed

+120
-27
lines changed

4 files changed

+120
-27
lines changed

pkg/distributor/distributor.go

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -349,8 +349,12 @@ func (d *Distributor) PushParsed(ctx context.Context, req *distributormodel.Push
349349
return nil, err
350350
}
351351

352-
if sample := d.shouldSample(tenantID, groups.Names()); !sample {
353-
level.Debug(logger).Log("msg", "skipping push request due to sampling", "tenant", tenantID)
352+
if sample, usageGroup := d.shouldSample(tenantID, groups.Names()); !sample {
353+
level.Debug(logger).Log(
354+
"msg", "skipping push request due to sampling",
355+
"tenant", tenantID,
356+
"usage_group", usageGroup,
357+
)
354358
validation.DiscardedProfiles.WithLabelValues(string(validation.SkippedBySamplingRules), tenantID).Add(float64(req.TotalProfiles))
355359
validation.DiscardedBytes.WithLabelValues(string(validation.SkippedBySamplingRules), tenantID).Add(float64(req.TotalBytesUncompressed))
356360
groups.CountDiscardedBytes(string(validation.SkippedBySamplingRules), req.TotalBytesUncompressed)
@@ -902,38 +906,39 @@ func (d *Distributor) checkUsageGroupsIngestLimit(req *distributormodel.PushRequ
902906
return nil
903907
}
904908

905-
func (d *Distributor) shouldSample(tenantID string, groupsInRequest []validation.UsageGroupMatchName) bool {
909+
// shouldSample returns true if the profile should be injected and optionally the usage group that was responsible for the decision.
910+
func (d *Distributor) shouldSample(tenantID string, groupsInRequest []validation.UsageGroupMatchName) (bool, *validation.UsageGroupMatchName) {
906911
l := d.limits.DistributorSampling(tenantID)
907912
if l == nil {
908-
return true
913+
return true, nil
909914
}
910915

911-
// Determine the minimum probability among all matching usage groups.
912-
minProb := 1.0
913-
matched := false
916+
samplingProbability := 1.0
917+
var match *validation.UsageGroupMatchName
914918
for _, group := range groupsInRequest {
915-
if probCfg, ok := l.UsageGroups[group.ResolvedName]; ok {
916-
matched = true
917-
if probCfg.Probability < minProb {
918-
minProb = probCfg.Probability
919-
}
919+
probabilityCfg, found := l.UsageGroups[group.ConfiguredName]
920+
if !found {
921+
probabilityCfg, found = l.UsageGroups[group.ResolvedName]
922+
}
923+
if !found {
920924
continue
921925
}
922-
if probCfg, ok := l.UsageGroups[group.ConfiguredName]; ok {
923-
matched = true
924-
if probCfg.Probability < minProb {
925-
minProb = probCfg.Probability
926-
}
926+
// a less specific group loses to a more specific one
927+
if match != nil && match.IsMoreSpecificThan(&group) {
928+
continue
929+
}
930+
// lower probability wins; when tied, the more specific group wins
931+
if probabilityCfg.Probability <= samplingProbability {
932+
samplingProbability = probabilityCfg.Probability
933+
match = &group
927934
}
928935
}
929936

930-
// If no sampling rules matched, accept the request.
931-
if !matched {
932-
return true
937+
if match == nil {
938+
return true, nil
933939
}
934940

935-
// Sample once using the minimum probability.
936-
return rand.Float64() <= minProb
941+
return rand.Float64() <= samplingProbability, match
937942
}
938943

939944
type profileTracker struct {

pkg/distributor/distributor_test.go

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2565,6 +2565,7 @@ func TestDistributor_shouldSample(t *testing.T) {
25652565
groups []validation.UsageGroupMatchName
25662566
samplingConfig *sampling.Config
25672567
expected bool
2568+
expectedMatch *validation.UsageGroupMatchName
25682569
}{
25692570
{
25702571
name: "no sampling config - should accept",
@@ -2593,6 +2594,10 @@ func TestDistributor_shouldSample(t *testing.T) {
25932594
},
25942595
},
25952596
expected: true,
2597+
expectedMatch: &validation.UsageGroupMatchName{
2598+
ConfiguredName: "group1",
2599+
ResolvedName: "group1",
2600+
},
25962601
},
25972602
{
25982603
name: "matching group with dynamic name - should accept",
@@ -2604,6 +2609,10 @@ func TestDistributor_shouldSample(t *testing.T) {
26042609
},
26052610
},
26062611
expected: true,
2612+
expectedMatch: &validation.UsageGroupMatchName{
2613+
ConfiguredName: "configured-name",
2614+
ResolvedName: "resolved-name",
2615+
},
26072616
},
26082617
{
26092618
name: "matching group with 0.0 probability - should reject",
@@ -2615,6 +2624,10 @@ func TestDistributor_shouldSample(t *testing.T) {
26152624
},
26162625
},
26172626
expected: false,
2627+
expectedMatch: &validation.UsageGroupMatchName{
2628+
ConfiguredName: "group1",
2629+
ResolvedName: "group1",
2630+
},
26182631
},
26192632
{
26202633
name: "multiple matching groups - should use minimum probability",
@@ -2630,6 +2643,10 @@ func TestDistributor_shouldSample(t *testing.T) {
26302643
},
26312644
},
26322645
expected: false,
2646+
expectedMatch: &validation.UsageGroupMatchName{
2647+
ConfiguredName: "group2",
2648+
ResolvedName: "group2",
2649+
},
26332650
},
26342651
{
26352652
name: "multiple matching groups - should prioritize specific group",
@@ -2641,10 +2658,33 @@ func TestDistributor_shouldSample(t *testing.T) {
26412658
samplingConfig: &sampling.Config{
26422659
UsageGroups: map[string]sampling.UsageGroupSampling{
26432660
"${labels.service_name}": {Probability: 1.0},
2644-
"test_service": {Probability: 0.0},
2661+
"test_service": {Probability: 1.0},
26452662
},
26462663
},
2647-
expected: false,
2664+
expected: true,
2665+
expectedMatch: &validation.UsageGroupMatchName{
2666+
ConfiguredName: "test_service",
2667+
ResolvedName: "test_service",
2668+
},
2669+
},
2670+
{
2671+
name: "multiple matching groups - should prioritize specific group (reversed order)",
2672+
tenantID: "test-tenant",
2673+
groups: []validation.UsageGroupMatchName{
2674+
{ConfiguredName: "test_service", ResolvedName: "test_service"},
2675+
{ConfiguredName: "${labels.service_name}", ResolvedName: "test_service"},
2676+
},
2677+
samplingConfig: &sampling.Config{
2678+
UsageGroups: map[string]sampling.UsageGroupSampling{
2679+
"${labels.service_name}": {Probability: 1.0},
2680+
"test_service": {Probability: 1.0},
2681+
},
2682+
},
2683+
expected: true,
2684+
expectedMatch: &validation.UsageGroupMatchName{
2685+
ConfiguredName: "test_service",
2686+
ResolvedName: "test_service",
2687+
},
26482688
},
26492689
}
26502690

@@ -2659,8 +2699,9 @@ func TestDistributor_shouldSample(t *testing.T) {
26592699
limits: overrides,
26602700
}
26612701

2662-
result := d.shouldSample(tt.tenantID, tt.groups)
2663-
assert.Equal(t, tt.expected, result, "shouldSample should return consistent results")
2702+
sample, match := d.shouldSample(tt.tenantID, tt.groups)
2703+
assert.Equal(t, tt.expected, sample)
2704+
assert.Equal(t, tt.expectedMatch, match)
26642705
})
26652706
}
26662707
}
@@ -2707,7 +2748,7 @@ func TestDistributor_shouldSample_Probability(t *testing.T) {
27072748

27082749
accepted := 0
27092750
for i := 0; i < iterations; i++ {
2710-
if d.shouldSample(tenantID, groups) {
2751+
if s, _ := d.shouldSample(tenantID, groups); s {
27112752
accepted++
27122753
}
27132754
}

pkg/validation/usage_groups.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,14 @@ type UsageGroupMatchName struct {
169169
ResolvedName string
170170
}
171171

172+
func (m *UsageGroupMatchName) IsMoreSpecificThan(other *UsageGroupMatchName) bool {
173+
return !strings.Contains(m.ConfiguredName, dynamicLabelNamePrefix) && strings.Contains(other.ConfiguredName, dynamicLabelNamePrefix)
174+
}
175+
176+
func (m *UsageGroupMatchName) String() string {
177+
return fmt.Sprintf("{configured: %s, resolved: %s}", m.ConfiguredName, m.ResolvedName)
178+
}
179+
172180
func (m UsageGroupMatch) CountReceivedBytes(profileType string, n int64) {
173181
if len(m.names) == 0 {
174182
usageGroupReceivedDecompressedBytes.WithLabelValues(profileType, m.tenantID, noMatchName).Add(float64(n))

pkg/validation/usage_groups_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,3 +584,42 @@ func testMustParseMatcher(t *testing.T, s string) []*labels.Matcher {
584584
require.NoError(t, err)
585585
return m
586586
}
587+
588+
func TestUsageGroupMatchName_IsMoreSpecificThan(t *testing.T) {
589+
tests := []struct {
590+
Name string
591+
Match UsageGroupMatchName
592+
Other UsageGroupMatchName
593+
Want bool
594+
}{
595+
{
596+
Name: "same name",
597+
Match: UsageGroupMatchName{ConfiguredName: "app/foo"},
598+
Other: UsageGroupMatchName{ConfiguredName: "app/foo"},
599+
Want: false,
600+
},
601+
{
602+
Name: "less specific name",
603+
Match: UsageGroupMatchName{ConfiguredName: "${labels.service_name}"},
604+
Other: UsageGroupMatchName{ConfiguredName: "test-service"},
605+
Want: false,
606+
},
607+
{
608+
Name: "more specific name",
609+
Match: UsageGroupMatchName{ConfiguredName: "test-service"},
610+
Other: UsageGroupMatchName{ConfiguredName: "${labels.service_name}"},
611+
Want: true,
612+
},
613+
{
614+
Name: "more specific name with prefix",
615+
Match: UsageGroupMatchName{ConfiguredName: "test-service"},
616+
Other: UsageGroupMatchName{ConfiguredName: "service/${labels.service_name}"},
617+
Want: true,
618+
},
619+
}
620+
for _, tt := range tests {
621+
t.Run(tt.Name, func(t *testing.T) {
622+
require.Equal(t, tt.Want, tt.Match.IsMoreSpecificThan(&tt.Other))
623+
})
624+
}
625+
}

0 commit comments

Comments
 (0)