Skip to content

Commit 35d1d71

Browse files
Add span_name_sanitization as user-configurable override (#6411)
* Add span_name_sanitization as user-configurable override This adds support for configuring span_name_sanitization through the user-configurable overrides API, allowing tenants to enable the DRAIN span name sanitizer without requiring operator configuration changes. Changes: - Add SpanNameSanitization field to LimitsMetricsGenerator - Add MetricsGeneratorSpanNameSanitization() to overrides manager - Add validation for span_name_sanitization in overrides API - Add comprehensive tests for the new configuration option - Add integration test for the overrides API endpoint Co-authored-by: Cursor <cursoragent@cursor.com> * Add PR link * Add documentation for span_name_sanitization override Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 1722997 commit 35d1d71

File tree

12 files changed

+277
-0
lines changed

12 files changed

+277
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* [ENHANCEMENT] Improved live store readiness check and added `readiness_target_lag` and `readiness_max_wait` config parameters. Live store will now - if `readiness_target_lag` is set - not report `/ready` until Kafka lag is brought under the specified value [#6238](https://github.com/grafana/tempo/pull/6238) [#6405](https://github.com/grafana/tempo/pull/6405) (@oleg-kozlyuk-grafana, @ruslan-mikhailov)
1616
* [ENHANCEMENT] Expose a new histogram metric to track the jobs per query distribution [#6343](https://github.com/grafana/tempo/pull/6343) (@javiermolinar)
1717
* [ENHANCEMENT] Do deep validation for filter policies in user configurable overrides API [#6407](https://github.com/grafana/tempo/pull/6407) (@electron0zero)
18+
* [ENHANCEMENT] Allow span_name_sanitization to be set via user-configurable overrides API [#6411](https://github.com/grafana/tempo/pull/6411) (@Logiraptor)
1819
* [ENHANCEMENT] Add `fail_on_high_lag` parameter to allow live-store to fail if it is lagged [#6363](https://github.com/grafana/tempo/pull/6363) (@ruslan-mikhailov)
1920
* [ENHANCEMENT] Add new metric for generator ring size: `tempo_distributor_metrics_generator_tenant_ring_size` [#5686](https://github.com/grafana/tempo/pull/5686) (@zalegrala)
2021
* [BUGFIX] Fix query-frontend unable to convert dedicated column blob option [#6377](https://github.com/grafana/tempo/pull/6377) (@stoewer)

cmd/tempo/app/overrides_validation.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ func (r *runtimeConfigValidator) Validate(config *overrides.Overrides) (warnings
3636
}
3737
}
3838

39+
if config.MetricsGenerator.SpanNameSanitization != "" {
40+
if err := validation.ValidateSpanNameSanitization(config.MetricsGenerator.SpanNameSanitization); err != nil {
41+
return warnings, err
42+
}
43+
}
44+
3945
if config.MetricsGenerator.NativeHistogramBucketFactor != 0 {
4046
if err := validation.ValidateNativeHistogramBucketFactor(config.MetricsGenerator.NativeHistogramBucketFactor); err != nil {
4147
return warnings, err
@@ -170,6 +176,12 @@ func (v *overridesValidator) Validate(limits *client.Limits) error {
170176
}
171177
}
172178

179+
if spanNameSanitization, ok := limits.GetMetricsGenerator().GetSpanNameSanitization(); ok {
180+
if err := validation.ValidateSpanNameSanitization(spanNameSanitization); err != nil {
181+
return err
182+
}
183+
}
184+
173185
if metricName, ok := limits.GetMetricsGenerator().GetProcessor().GetHostInfo().GetMetricName(); ok {
174186
if err := validation.ValidateHostInfoMetricName(metricName); err != nil {
175187
return err

cmd/tempo/app/overrides_validation_test.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,35 @@ func Test_runtimeOverridesValidator(t *testing.T) {
246246
backend.WarnTooManyColumns{Type: "string", Scope: "resource", Count: 21, MaxCount: 20},
247247
},
248248
},
249+
{
250+
name: "metrics_generator.span_name_sanitization empty (disabled)",
251+
cfg: Config{},
252+
overrides: overrides.Overrides{MetricsGenerator: overrides.MetricsGeneratorOverrides{
253+
SpanNameSanitization: "",
254+
}},
255+
},
256+
{
257+
name: "metrics_generator.span_name_sanitization dry_run",
258+
cfg: Config{},
259+
overrides: overrides.Overrides{MetricsGenerator: overrides.MetricsGeneratorOverrides{
260+
SpanNameSanitization: "dry_run",
261+
}},
262+
},
263+
{
264+
name: "metrics_generator.span_name_sanitization enabled",
265+
cfg: Config{},
266+
overrides: overrides.Overrides{MetricsGenerator: overrides.MetricsGeneratorOverrides{
267+
SpanNameSanitization: "enabled",
268+
}},
269+
},
270+
{
271+
name: "metrics_generator.span_name_sanitization invalid",
272+
cfg: Config{},
273+
overrides: overrides.Overrides{MetricsGenerator: overrides.MetricsGeneratorOverrides{
274+
SpanNameSanitization: "invalid",
275+
}},
276+
expErr: "span_name_sanitization \"invalid\" is not valid, valid values: [ dry_run enabled]",
277+
},
249278
}
250279

251280
for _, tc := range testCases {
@@ -788,6 +817,43 @@ func Test_overridesValidator(t *testing.T) {
788817
},
789818
expErr: `dimension_mapping "combined" produces label "combined" which collides with dimension_mapping "combined"`,
790819
},
820+
{
821+
name: "metrics_generator.span_name_sanitization empty (disabled)",
822+
cfg: Config{},
823+
limits: client.Limits{
824+
MetricsGenerator: client.LimitsMetricsGenerator{
825+
SpanNameSanitization: strPtr(""),
826+
},
827+
},
828+
},
829+
{
830+
name: "metrics_generator.span_name_sanitization dry_run",
831+
cfg: Config{},
832+
limits: client.Limits{
833+
MetricsGenerator: client.LimitsMetricsGenerator{
834+
SpanNameSanitization: strPtr("dry_run"),
835+
},
836+
},
837+
},
838+
{
839+
name: "metrics_generator.span_name_sanitization enabled",
840+
cfg: Config{},
841+
limits: client.Limits{
842+
MetricsGenerator: client.LimitsMetricsGenerator{
843+
SpanNameSanitization: strPtr("enabled"),
844+
},
845+
},
846+
},
847+
{
848+
name: "metrics_generator.span_name_sanitization invalid",
849+
cfg: Config{},
850+
limits: client.Limits{
851+
MetricsGenerator: client.LimitsMetricsGenerator{
852+
SpanNameSanitization: strPtr("invalid"),
853+
},
854+
},
855+
expErr: "span_name_sanitization \"invalid\" is not valid, valid values: [ dry_run enabled]",
856+
},
791857
}
792858

793859
for _, tc := range testCases {

docs/sources/tempo/configuration/_index.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1988,6 +1988,14 @@ overrides:
19881988
# receiver must be configured to ingest native histograms.
19891989
[generate_native_histograms: <classic|native|both> | default = classic]
19901990
1991+
# Enables span name sanitization using DRAIN clustering to reduce cardinality.
1992+
# Similar span names are clustered together (e.g., "GET /users/123" becomes "GET /users/<*>").
1993+
# Options:
1994+
# - "" (empty string): Disabled (default)
1995+
# - "dry_run": Produces a demand metric for the sanitized cardinality without applying changes
1996+
# - "enabled": Applies DRAIN clustering to span names
1997+
[span_name_sanitization: <string> | default = ""]
1998+
19911999
# Distributor -> metrics-generator forwarder related overrides
19922000
forwarder:
19932001
# Spans are stored in a queue in the distributor before being sent to the metrics-generators.

docs/sources/tempo/operations/manage-advanced-systems/user-configurable-overrides.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ metrics_generator:
7272
[native_histogram_max_bucket_number: <int> | default = 100]
7373
[native_histogram_bucket_factor: <float> | default = 1.1]
7474
[native_histogram_min_reset_duration: <duration> | default = 15m]
75+
[span_name_sanitization: <string>]
7576

7677
processor:
7778

integration/api/overrides_api_test.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,111 @@ func TestOverridesAPI_DELETE(t *testing.T) {
678678
})
679679
}
680680

681+
func TestOverridesAPI_SpanNameSanitization(t *testing.T) {
682+
util.RunIntegrationTests(t, util.TestHarnessConfig{
683+
ConfigOverlay: configOverrides,
684+
DeploymentMode: util.DeploymentModeSingleBinary,
685+
Backends: util.BackendObjectStorageS3,
686+
}, func(h *util.TempoHarness) {
687+
t.Run("sets span_name_sanitization via POST and verifies GET response", func(t *testing.T) {
688+
apiClient := h.APIClientHTTP("tenant-span-name-sanitization-1")
689+
690+
// Create overrides with span_name_sanitization set to "enabled"
691+
limits := &client.Limits{
692+
MetricsGenerator: client.LimitsMetricsGenerator{
693+
SpanNameSanitization: stringPtr("enabled"),
694+
},
695+
}
696+
setEtag, err := apiClient.SetOverrides(limits, "0")
697+
require.NoError(t, err)
698+
require.NotEmpty(t, setEtag)
699+
700+
// Verify GET response contains the value
701+
returnedLimits, etag, err := apiClient.GetOverrides()
702+
require.NoError(t, err)
703+
require.Equal(t, setEtag, etag)
704+
spanNameSanitization, ok := returnedLimits.GetMetricsGenerator().GetSpanNameSanitization()
705+
require.True(t, ok)
706+
require.Equal(t, "enabled", spanNameSanitization)
707+
})
708+
709+
t.Run("sets span_name_sanitization via PATCH and verifies GET response", func(t *testing.T) {
710+
apiClient := h.APIClientHTTP("tenant-span-name-sanitization-2")
711+
712+
// Create initial config
713+
initialLimits := &client.Limits{
714+
MetricsGenerator: client.LimitsMetricsGenerator{
715+
DisableCollection: boolPtr(true),
716+
},
717+
}
718+
_, err := apiClient.SetOverrides(initialLimits, "0")
719+
require.NoError(t, err)
720+
721+
// PATCH with span_name_sanitization set to "dry_run"
722+
patch := &client.Limits{
723+
MetricsGenerator: client.LimitsMetricsGenerator{
724+
SpanNameSanitization: stringPtr("dry_run"),
725+
},
726+
}
727+
returnedLimits, _, err := apiClient.PatchOverrides(patch)
728+
require.NoError(t, err)
729+
730+
// Verify PATCH response contains the value
731+
spanNameSanitization, ok := returnedLimits.GetMetricsGenerator().GetSpanNameSanitization()
732+
require.True(t, ok)
733+
require.Equal(t, "dry_run", spanNameSanitization)
734+
735+
// Verify GET response also contains the value
736+
getLimits, _, err := apiClient.GetOverrides()
737+
require.NoError(t, err)
738+
spanNameSanitization, ok = getLimits.GetMetricsGenerator().GetSpanNameSanitization()
739+
require.True(t, ok)
740+
require.Equal(t, "dry_run", spanNameSanitization)
741+
})
742+
743+
t.Run("returns 400 for invalid span_name_sanitization value via POST", func(t *testing.T) {
744+
apiClient := h.APIClientHTTP("tenant-span-name-sanitization-invalid-1")
745+
746+
// Try to set invalid span_name_sanitization value
747+
limits := &client.Limits{
748+
MetricsGenerator: client.LimitsMetricsGenerator{
749+
SpanNameSanitization: stringPtr("invalid"),
750+
},
751+
}
752+
_, err := apiClient.SetOverrides(limits, "0")
753+
require.Error(t, err)
754+
require.ErrorContains(t, err, "400")
755+
require.ErrorContains(t, err, "span_name_sanitization")
756+
require.ErrorContains(t, err, "not valid")
757+
})
758+
759+
t.Run("returns 400 for invalid span_name_sanitization value via PATCH", func(t *testing.T) {
760+
apiClient := h.APIClientHTTP("tenant-span-name-sanitization-invalid-2")
761+
762+
// Create initial config
763+
initialLimits := &client.Limits{
764+
MetricsGenerator: client.LimitsMetricsGenerator{
765+
DisableCollection: boolPtr(true),
766+
},
767+
}
768+
_, err := apiClient.SetOverrides(initialLimits, "0")
769+
require.NoError(t, err)
770+
771+
// Try to PATCH with invalid span_name_sanitization value
772+
patch := &client.Limits{
773+
MetricsGenerator: client.LimitsMetricsGenerator{
774+
SpanNameSanitization: stringPtr("invalid"),
775+
},
776+
}
777+
_, _, err = apiClient.PatchOverrides(patch)
778+
require.Error(t, err)
779+
require.ErrorContains(t, err, "400")
780+
require.ErrorContains(t, err, "span_name_sanitization")
781+
require.ErrorContains(t, err, "not valid")
782+
})
783+
})
784+
}
785+
681786
// Helper functions for overrides API tests
682787

683788
func printLimits(limits *client.Limits, version string) {
@@ -710,3 +815,7 @@ func keys(m map[string]struct{}) []string {
710815
}
711816
return keys
712817
}
818+
819+
func stringPtr(s string) *string {
820+
return &s
821+
}

modules/overrides/user_configurable_overrides.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,13 @@ func (o *userConfigurableOverridesManager) MetricsGeneratorTraceIDLabelName(user
261261
return o.Interface.MetricsGeneratorTraceIDLabelName(userID)
262262
}
263263

264+
func (o *userConfigurableOverridesManager) MetricsGeneratorSpanNameSanitization(userID string) string {
265+
if spanNameSanitization, ok := o.getTenantLimits(userID).GetMetricsGenerator().GetSpanNameSanitization(); ok {
266+
return spanNameSanitization
267+
}
268+
return o.Interface.MetricsGeneratorSpanNameSanitization(userID)
269+
}
270+
264271
func (o *userConfigurableOverridesManager) MetricsGeneratorGenerateNativeHistograms(userID string) histograms.HistogramMethod {
265272
if method, ok := o.getTenantLimits(userID).GetMetricsGenerator().GetGenerateNativeHistograms(); ok {
266273
return method

modules/overrides/user_configurable_overrides_test.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,49 @@ func TestUserConfigOverridesManager_allFields(t *testing.T) {
209209
assert.Equal(t, "custom_key", mgr.MetricsGeneratorProcessorSpanMetricsSpanMultiplierKey(tenant1))
210210
}
211211

212+
func TestUserConfigOverridesManager_MetricsGeneratorSpanNameSanitization(t *testing.T) {
213+
defaultLimits := Overrides{
214+
MetricsGenerator: MetricsGeneratorOverrides{
215+
SpanNameSanitization: "disabled",
216+
},
217+
}
218+
_, mgr, cleanup := localUserConfigOverrides(t, defaultLimits, nil)
219+
defer cleanup()
220+
221+
// Test fallback behavior - tenant without override should return default value
222+
assert.Equal(t, "disabled", mgr.MetricsGeneratorSpanNameSanitization(tenant1))
223+
assert.Equal(t, "disabled", mgr.MetricsGeneratorSpanNameSanitization(tenant2))
224+
225+
// Set user-configurable override for tenant1
226+
mgr.tenantLimits[tenant1] = &userconfigurableoverrides.Limits{
227+
MetricsGenerator: userconfigurableoverrides.LimitsMetricsGenerator{
228+
SpanNameSanitization: strPtr("enabled"),
229+
},
230+
}
231+
232+
// Test tenant override behavior - tenant1 should return override value
233+
assert.Equal(t, "enabled", mgr.MetricsGeneratorSpanNameSanitization(tenant1))
234+
235+
// Test fallback behavior - tenant2 without override should still return default value
236+
assert.Equal(t, "disabled", mgr.MetricsGeneratorSpanNameSanitization(tenant2))
237+
238+
// Update override for tenant1
239+
mgr.tenantLimits[tenant1] = &userconfigurableoverrides.Limits{
240+
MetricsGenerator: userconfigurableoverrides.LimitsMetricsGenerator{
241+
SpanNameSanitization: strPtr("strict"),
242+
},
243+
}
244+
245+
// Test updated override value
246+
assert.Equal(t, "strict", mgr.MetricsGeneratorSpanNameSanitization(tenant1))
247+
248+
// Remove override for tenant1
249+
delete(mgr.tenantLimits, tenant1)
250+
251+
// Test fallback behavior after removal - should return default value again
252+
assert.Equal(t, "disabled", mgr.MetricsGeneratorSpanNameSanitization(tenant1))
253+
}
254+
212255
func TestUserConfigOverridesManager_populateFromBackend(t *testing.T) {
213256
defaultLimits := Overrides{
214257
Forwarders: []string{"my-forwarder"},

modules/overrides/userconfigurable/api/limits.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ func limitsFromOverrides(overrides overrides.Interface, userID string) *client.L
2424
NativeHistogramMaxBucketNumber: uint32Ptr(overrides.MetricsGeneratorNativeHistogramMaxBucketNumber(userID)),
2525
NativeHistogramBucketFactor: floatPtr(overrides.MetricsGeneratorNativeHistogramBucketFactor(userID)),
2626
NativeHistogramMinResetDuration: timePtr(overrides.MetricsGeneratorNativeHistogramMinResetDuration(userID)),
27+
SpanNameSanitization: func() *string {
28+
s := overrides.MetricsGeneratorSpanNameSanitization(userID)
29+
if s == "" {
30+
return nil
31+
}
32+
return &s
33+
}(),
2734
Processor: client.LimitsMetricsGeneratorProcessor{
2835
ServiceGraphs: client.LimitsMetricsGeneratorProcessorServiceGraphs{
2936
Dimensions: strArrPtr(overrides.MetricsGeneratorProcessorServiceGraphsDimensions(userID)),

modules/overrides/userconfigurable/api/limits_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ func Test_limitsFromOverrides(t *testing.T) {
3030
NativeHistogramMaxBucketNumber: 160,
3131
NativeHistogramBucketFactor: 1.2,
3232
NativeHistogramMinResetDuration: 10 * time.Minute,
33+
SpanNameSanitization: "enabled",
3334
Processor: overrides.ProcessorOverrides{
3435
ServiceGraphs: overrides.ServiceGraphsOverrides{
3536
HistogramBuckets: []float64{0.1, 0.2, 0.5},
@@ -91,6 +92,7 @@ func Test_limitsFromOverrides(t *testing.T) {
9192
"native_histogram_max_bucket_number": 160,
9293
"native_histogram_bucket_factor": 1.2,
9394
"native_histogram_min_reset_duration": "10m0s",
95+
"span_name_sanitization": "enabled",
9496
"processor": {
9597
"service_graphs": {
9698
"dimensions": [

0 commit comments

Comments
 (0)