Skip to content

Commit 871f97c

Browse files
fix: reload span_name_sanitization overrides during runtime (#6435)
* fix: reload MetricsGeneratorSpanNameSanitization overrides during runtime before this fix, we were just loading the MetricsGeneratorSpanNameSanitization of the overrides at startup if the overrides were updated (which they can without restart), we won't reload them and the MetricsGeneratorSpanNameSanitization config change in the overrides won't take effact without restart * Add CHANGELOG.md * cleanup noopSanitizer because we don't need it anymore * Add a test to ensure toggle works * replace assert with require
1 parent 805ba3b commit 871f97c

File tree

7 files changed

+105
-49
lines changed

7 files changed

+105
-49
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
* [BUGFIX] Fix query_end_cutoff param for query range [#6360](https://github.com/grafana/tempo/pull/6360) (@ruslan-mikhailov)
2727
* [BUGFIX] generator: fix dimension_mappings and target_info_excluded_dimensions being unconditionally overwritten even when overrides were nil [#6390](https://github.com/grafana/tempo/pull/6390) (@carles-grafana)
2828
* [BUGFIX] generator: fix panic when `write_relabel_configs` is configured on remote write endpoints [#6396](https://github.com/grafana/tempo/pull/6396) (@carles-grafana)
29+
* [BUGFIX] fix: reload span_name_sanitization overrides during runtime [#6435](https://github.com/grafana/tempo/pull/6435) (@electron0zero)
2930

3031
### 3.0 Cleanup
3132

modules/generator/registry/builder_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
)
99

1010
func TestLabelBuilder(t *testing.T) {
11-
builder := NewLabelBuilder(0, 0, &noopSanitizer{})
11+
builder := NewLabelBuilder(0, 0, newTestDrainSanitizer(SpanNameSanitizationDisabled))
1212
builder.Add("name", "value")
1313
lbls, ok := builder.CloseAndBuildLabels()
1414

@@ -23,7 +23,7 @@ func TestLabelBuilder(t *testing.T) {
2323
}
2424

2525
func TestLabelBuilder_MaxLabelNameLength(t *testing.T) {
26-
builder := NewLabelBuilder(10, 10, &noopSanitizer{})
26+
builder := NewLabelBuilder(10, 10, newTestDrainSanitizer(SpanNameSanitizationDisabled))
2727
builder.Add("name", "very_long_value")
2828
builder.Add("very_long_name", "value")
2929

@@ -34,7 +34,7 @@ func TestLabelBuilder_MaxLabelNameLength(t *testing.T) {
3434
}
3535

3636
func TestLabelBuilder_InvalidUTF8(t *testing.T) {
37-
builder := NewLabelBuilder(0, 0, &noopSanitizer{})
37+
builder := NewLabelBuilder(0, 0, newTestDrainSanitizer(SpanNameSanitizationDisabled))
3838
builder.Add("name", "svc-\xc3\x28") // Invalid UTF-8
3939

4040
_, ok := builder.CloseAndBuildLabels()

modules/generator/registry/drain_sanitizer.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,17 @@ var (
2525
}, []string{"tenant"})
2626
)
2727

28+
// sanitizeModeFunc returns the current span name sanitization mode for the tenant.
29+
type sanitizeModeFunc func(tenant string) string
30+
2831
type DrainSanitizer struct {
2932
mtx sync.Mutex
3033
drain *drain.Drain
31-
dryRun bool
3234
demand *Cardinality
3335

36+
tenant string
37+
sanitizeModeF sanitizeModeFunc
38+
3439
metricTotalSpansSanitized prometheus.Counter
3540
demandGauge prometheus.Gauge
3641

@@ -41,10 +46,11 @@ type DrainSanitizer struct {
4146
pruneChan <-chan time.Time
4247
}
4348

44-
func NewDrainSanitizer(tenant string, dryRun bool, staleDuration time.Duration) *DrainSanitizer {
49+
func NewDrainSanitizer(tenant string, sanitizeModeF sanitizeModeFunc, staleDuration time.Duration) *DrainSanitizer {
4550
return &DrainSanitizer{
4651
drain: drain.New(tenant, drain.DefaultConfig()),
47-
dryRun: dryRun,
52+
tenant: tenant,
53+
sanitizeModeF: sanitizeModeF,
4854
metricTotalSpansSanitized: metricTotalSpansSanitized.WithLabelValues(tenant),
4955
demand: NewCardinality(staleDuration, removeStaleSeriesInterval),
5056
demandGauge: metricPostSanitizationDemand.WithLabelValues(tenant),
@@ -54,6 +60,13 @@ func NewDrainSanitizer(tenant string, dryRun bool, staleDuration time.Duration)
5460
}
5561

5662
func (s *DrainSanitizer) Sanitize(lbls labels.Labels) labels.Labels {
63+
// Check the override at runtime so that changes to the sanitization mode override
64+
// take effect without restarting the generator.
65+
mode := s.sanitizeModeF(s.tenant)
66+
if mode == SpanNameSanitizationDisabled {
67+
return lbls
68+
}
69+
5770
s.doPeriodicMaintenance()
5871

5972
s.mtx.Lock()
@@ -84,7 +97,8 @@ func (s *DrainSanitizer) Sanitize(lbls labels.Labels) labels.Labels {
8497
newLabelHash := newLbls.Hash()
8598
s.demand.Insert(newLabelHash)
8699

87-
if s.dryRun {
100+
// in dry-run mode, return the labels without modifying but capture metrics
101+
if mode == SpanNameSanitizationDryRun {
88102
return lbls
89103
}
90104

modules/generator/registry/drain_sanitizer_test.go

Lines changed: 77 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,17 @@ import (
66
"time"
77

88
"github.com/prometheus/prometheus/model/labels"
9-
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
1010
)
1111

12+
func newTestDrainSanitizer(mode string) *DrainSanitizer {
13+
return NewDrainSanitizer("test-tenant", func(string) string { return mode }, 15*time.Minute)
14+
}
15+
1216
func TestDrainSanitizer_PatternDetection(t *testing.T) {
1317
t.Parallel()
1418

15-
sanitizer := NewDrainSanitizer("test-tenant", false, 15*time.Minute)
19+
sanitizer := newTestDrainSanitizer(SpanNameSanitizationEnabled)
1620

1721
// Train with similar span names that should form a pattern
1822
lbls1 := labels.FromStrings("span_name", "GET /api/users/123", "service", "api")
@@ -21,25 +25,25 @@ func TestDrainSanitizer_PatternDetection(t *testing.T) {
2125

2226
// First call should return original (no pattern yet)
2327
result1 := sanitizer.Sanitize(lbls1)
24-
assert.Equal(t, "GET /api/users/123", result1.Get("span_name"))
28+
require.Equal(t, "GET /api/users/123", result1.Get("span_name"))
2529

2630
// After training, subsequent similar spans should be sanitized
2731
result2 := sanitizer.Sanitize(lbls2)
2832
result3 := sanitizer.Sanitize(lbls3)
2933

3034
// All should have the same sanitized span_name pattern
31-
assert.Equal(t, result2.Get("span_name"), result3.Get("span_name"))
35+
require.Equal(t, result2.Get("span_name"), result3.Get("span_name"))
3236
// Pattern should contain the parameter marker
33-
assert.Contains(t, result2.Get("span_name"), "<_>")
37+
require.Contains(t, result2.Get("span_name"), "<_>")
3438
// Original labels should be preserved
35-
assert.Equal(t, "api", result2.Get("service"))
36-
assert.Equal(t, "api", result3.Get("service"))
39+
require.Equal(t, "api", result2.Get("service"))
40+
require.Equal(t, "api", result3.Get("service"))
3741
}
3842

3943
func TestDrainSanitizer_DryRunMode(t *testing.T) {
4044
t.Parallel()
4145

42-
sanitizer := NewDrainSanitizer("test-tenant", true, 15*time.Minute) // dryRun = true
46+
sanitizer := newTestDrainSanitizer(SpanNameSanitizationDryRun)
4347

4448
lbls1 := labels.FromStrings("span_name", "GET /api/users/123", "service", "api")
4549
lbls2 := labels.FromStrings("span_name", "GET /api/users/456", "service", "api")
@@ -49,29 +53,75 @@ func TestDrainSanitizer_DryRunMode(t *testing.T) {
4953

5054
// In dry-run mode, even if pattern is detected, return original labels
5155
result := sanitizer.Sanitize(lbls2)
52-
assert.Equal(t, "GET /api/users/456", result.Get("span_name"))
53-
assert.Equal(t, lbls2, result)
56+
require.Equal(t, "GET /api/users/456", result.Get("span_name"))
57+
require.Equal(t, lbls2, result)
58+
}
59+
60+
func TestDrainSanitizer_RuntimeModeToggle(t *testing.T) {
61+
t.Parallel()
62+
63+
mode := SpanNameSanitizationEnabled
64+
sanitizer := NewDrainSanitizer("test-tenant", func(string) string { return mode }, 15*time.Minute)
65+
66+
// Train the drain tree with similar span names to establish a pattern
67+
sanitizer.Sanitize(labels.FromStrings("span_name", "GET /api/users/123"))
68+
sanitizer.Sanitize(labels.FromStrings("span_name", "GET /api/users/456"))
69+
70+
// With enabled mode, should sanitize
71+
result := sanitizer.Sanitize(labels.FromStrings("span_name", "GET /api/users/789"))
72+
require.Contains(t, result.Get("span_name"), "<_>")
73+
74+
// Toggle to disabled at runtime - should pass through
75+
mode = SpanNameSanitizationDisabled
76+
result = sanitizer.Sanitize(labels.FromStrings("span_name", "GET /api/users/999"))
77+
require.Equal(t, "GET /api/users/999", result.Get("span_name"))
78+
79+
// Toggle to dry-run at runtime - should pass through but still train
80+
mode = SpanNameSanitizationDryRun
81+
result = sanitizer.Sanitize(labels.FromStrings("span_name", "GET /api/users/111"))
82+
require.Equal(t, "GET /api/users/111", result.Get("span_name"))
83+
84+
// Toggle back to enabled - should sanitize again
85+
mode = SpanNameSanitizationEnabled
86+
result = sanitizer.Sanitize(labels.FromStrings("span_name", "GET /api/users/222"))
87+
require.Contains(t, result.Get("span_name"), "<_>")
88+
}
89+
90+
func TestDrainSanitizer_DisabledMode(t *testing.T) {
91+
t.Parallel()
92+
93+
sanitizer := newTestDrainSanitizer(SpanNameSanitizationDisabled)
94+
95+
lbls1 := labels.FromStrings("span_name", "GET /api/users/123", "service", "api")
96+
lbls2 := labels.FromStrings("span_name", "GET /api/users/456", "service", "api")
97+
98+
sanitizer.Sanitize(lbls1)
99+
100+
// When disabled, should always return original labels
101+
result := sanitizer.Sanitize(lbls2)
102+
require.Equal(t, "GET /api/users/456", result.Get("span_name"))
103+
require.Equal(t, lbls2, result)
54104
}
55105

56106
func TestDrainSanitizer_NilClusterHandling(t *testing.T) {
57107
t.Parallel()
58108

59-
sanitizer := NewDrainSanitizer("test-tenant", false, 15*time.Minute)
109+
sanitizer := newTestDrainSanitizer(SpanNameSanitizationEnabled)
60110

61111
// Span name with too few tokens (less than MinTokens=3)
62112
// Tokenizer will produce tokens like ["a", "<END>"] which is < 3
63113
lbls := labels.FromStrings("span_name", "a", "service", "api")
64114
result := sanitizer.Sanitize(lbls)
65115

66116
// Should return original labels when cluster is nil
67-
assert.Equal(t, "a", result.Get("span_name"))
68-
assert.Equal(t, lbls, result)
117+
require.Equal(t, "a", result.Get("span_name"))
118+
require.Equal(t, lbls, result)
69119
}
70120

71121
func TestDrainSanitizer_ConcurrentAccess(t *testing.T) {
72122
t.Parallel()
73123

74-
sanitizer := NewDrainSanitizer("test-tenant", false, 15*time.Minute)
124+
sanitizer := newTestDrainSanitizer(SpanNameSanitizationEnabled)
75125

76126
var wg sync.WaitGroup
77127
numGoroutines := 10
@@ -85,8 +135,8 @@ func TestDrainSanitizer_ConcurrentAccess(t *testing.T) {
85135
lbls := labels.FromStrings("span_name", "GET /api/users/123", "id", string(rune(id*1000+j)))
86136
result := sanitizer.Sanitize(lbls)
87137
// Should always return valid labels
88-
assert.NotNil(t, result)
89-
assert.NotEmpty(t, result.Get("span_name"))
138+
require.NotNil(t, result)
139+
require.NotEmpty(t, result.Get("span_name"))
90140
}
91141
}(i)
92142
}
@@ -98,7 +148,7 @@ func TestDrainSanitizer_ConcurrentAccess(t *testing.T) {
98148
func TestDrainSanitizer_DemandTracking(t *testing.T) {
99149
t.Parallel()
100150

101-
sanitizer := NewDrainSanitizer("test-tenant", false, 15*time.Minute)
151+
sanitizer := newTestDrainSanitizer(SpanNameSanitizationEnabled)
102152

103153
// Create labels with different span names
104154
lbls1 := labels.FromStrings("span_name", "GET /api/users/123")
@@ -114,42 +164,42 @@ func TestDrainSanitizer_DemandTracking(t *testing.T) {
114164
// but we can verify the sanitizer doesn't crash and processes all labels)
115165
// The demand gauge will be updated periodically via doPeriodicMaintenance
116166
demandEstimate := sanitizer.demand.Estimate()
117-
assert.GreaterOrEqual(t, demandEstimate, uint64(1))
167+
require.GreaterOrEqual(t, demandEstimate, uint64(1))
118168
}
119169

120170
func TestDrainSanitizer_NoSpanNameLabel(t *testing.T) {
121171
t.Parallel()
122172

123-
sanitizer := NewDrainSanitizer("test-tenant", false, 15*time.Minute)
173+
sanitizer := newTestDrainSanitizer(SpanNameSanitizationEnabled)
124174

125175
// Labels without span_name
126176
lbls := labels.FromStrings("service", "api", "method", "GET")
127177
result := sanitizer.Sanitize(lbls)
128178

129179
// Should return original labels (span_name is empty string, which drain will reject)
130-
assert.Equal(t, lbls, result)
180+
require.Equal(t, lbls, result)
131181
}
132182

133183
func TestDrainSanitizer_PatternBeforeSanitization(t *testing.T) {
134184
t.Parallel()
135185

136-
sanitizer := NewDrainSanitizer("test-tenant", false, 15*time.Minute)
186+
sanitizer := newTestDrainSanitizer(SpanNameSanitizationEnabled)
137187

138188
// First span name - no pattern yet, so returns original
139189
lbls1 := labels.FromStrings("span_name", "GET /api/users/123")
140190
result1 := sanitizer.Sanitize(lbls1)
141-
assert.Equal(t, "GET /api/users/123", result1.Get("span_name"))
191+
require.Equal(t, "GET /api/users/123", result1.Get("span_name"))
142192

143193
// Same span name again - still no pattern (only one instance)
144194
result2 := sanitizer.Sanitize(lbls1)
145-
assert.Equal(t, "GET /api/users/123", result2.Get("span_name"))
195+
require.Equal(t, "GET /api/users/123", result2.Get("span_name"))
146196

147197
// Different but similar span name - now pattern should emerge
148198
lbls2 := labels.FromStrings("span_name", "GET /api/users/456")
149199
result3 := sanitizer.Sanitize(lbls2)
150200
// After pattern detection, should return sanitized version
151-
assert.NotEqual(t, "GET /api/users/456", result3.Get("span_name"))
152-
assert.Contains(t, result3.Get("span_name"), "<_>")
201+
require.NotEqual(t, "GET /api/users/456", result3.Get("span_name"))
202+
require.Contains(t, result3.Get("span_name"), "<_>")
153203
}
154204

155205
func TestDrainSanitizer_FullSanitizedOutput(t *testing.T) {
@@ -185,7 +235,7 @@ func TestDrainSanitizer_FullSanitizedOutput(t *testing.T) {
185235
for _, tc := range testCases {
186236
t.Run(tc.name, func(t *testing.T) {
187237
t.Parallel()
188-
sanitizer := NewDrainSanitizer("test-tenant", false, 15*time.Minute)
238+
sanitizer := newTestDrainSanitizer(SpanNameSanitizationEnabled)
189239

190240
// Train with first inputs
191241
for _, input := range tc.inputs[:len(tc.inputs)-1] {
@@ -195,7 +245,7 @@ func TestDrainSanitizer_FullSanitizedOutput(t *testing.T) {
195245
// Last input should produce the expected sanitized output
196246
lastInput := tc.inputs[len(tc.inputs)-1]
197247
result := sanitizer.Sanitize(labels.FromStrings("span_name", lastInput))
198-
assert.Equal(t, tc.expectedOutput, result.Get("span_name"))
248+
require.Equal(t, tc.expectedOutput, result.Get("span_name"))
199249
})
200250
}
201251
}

modules/generator/registry/registry.go

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -98,19 +98,11 @@ type Limiter interface {
9898
OnDelete(labelHash uint64, seriesCount uint32)
9999
}
100100

101-
// Sanitizer is applies a transformation to all non-constant labels.
101+
// Sanitizer applies a transformation to all non-constant labels.
102102
type Sanitizer interface {
103103
Sanitize(lbls labels.Labels) labels.Labels
104104
}
105105

106-
type noopSanitizer struct{}
107-
108-
var _ Sanitizer = (*noopSanitizer)(nil)
109-
110-
func (s *noopSanitizer) Sanitize(lbls labels.Labels) labels.Labels {
111-
return lbls
112-
}
113-
114106
// New creates a ManagedRegistry. This Registry will scrape itself, write samples into an appender
115107
// and remove stale series.
116108
func New(cfg *Config, overrides Overrides, tenant string, appendable storage.Appendable, logger log.Logger, limiter Limiter) *ManagedRegistry {
@@ -127,10 +119,7 @@ func New(cfg *Config, overrides Overrides, tenant string, appendable storage.App
127119
externalLabels[cfg.InjectTenantIDAs] = tenant
128120
}
129121

130-
var sanitizer Sanitizer = &noopSanitizer{}
131-
if sanitizationMode := overrides.MetricsGeneratorSpanNameSanitization(tenant); sanitizationMode != SpanNameSanitizationDisabled {
132-
sanitizer = NewDrainSanitizer(tenant, sanitizationMode == SpanNameSanitizationDryRun, cfg.StaleDuration)
133-
}
122+
sanitizer := NewDrainSanitizer(tenant, overrides.MetricsGeneratorSpanNameSanitization, cfg.StaleDuration)
134123

135124
r := &ManagedRegistry{
136125
onShutdown: cancel,

modules/generator/registry/registry_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func (m *mockLimiter) OnPruneStaleSeries() {
5757
}
5858

5959
func buildTestLabels(names []string, values []string) labels.Labels {
60-
builder := NewLabelBuilder(0, 0, &noopSanitizer{})
60+
builder := NewLabelBuilder(0, 0, newTestDrainSanitizer(SpanNameSanitizationDisabled))
6161
for i := range names {
6262
builder.Add(names[i], values[i])
6363
}

modules/generator/registry/test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ func (t *TestRegistry) NewGauge(name string) Gauge {
3939
}
4040

4141
func (t *TestRegistry) NewLabelBuilder() LabelBuilder {
42-
return NewLabelBuilder(0, 0, &noopSanitizer{})
42+
nds := NewDrainSanitizer("test", func(string) string { return SpanNameSanitizationDisabled }, 0)
43+
44+
return NewLabelBuilder(0, 0, nds)
4345
}
4446

4547
func (t *TestRegistry) NewHistogram(name string, buckets []float64, histogramOverrides HistogramMode) Histogram {

0 commit comments

Comments
 (0)