diff --git a/.chloggen/44229.yaml b/.chloggen/44229.yaml new file mode 100644 index 0000000000000..e3de34ae7f858 --- /dev/null +++ b/.chloggen/44229.yaml @@ -0,0 +1,33 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: 'bug_fix' + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) +component: processor/redaction + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Improve database sanitization with system-aware obfuscation, span name sanitization, and URL path parameter redaction. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [44229] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + - Database sanitization now validates span kind (CLIENT/SERVER/INTERNAL ) and requires db.system.name/db.system attribute for traces/metrics + - Implemented span name obfuscation for database operations based on db.system + - Added URL path parameter sanitization for span names with configurable pattern matching + - Improved query validation database sanitizers + - Fix issue ensuring no spans with `...` name can be generated due to enabling multiple sanitizers + - If something went wrong during span name sanitization, original span name is used + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/processor/redactionprocessor/README.md b/processor/redactionprocessor/README.md index 6e2fa4eedcc5c..7740ce3b4bcea 100644 --- a/processor/redactionprocessor/README.md +++ b/processor/redactionprocessor/README.md @@ -193,3 +193,5 @@ The database sanitizer will: - Preserve query structure while removing sensitive data This provides an additional layer of protection when collecting telemetry that includes database operations. + +**Trace and metric behaviour:** Database sanitization for spans and metric attributes only runs when the telemetry includes a `db.system.name` or `db.system` attribute and the span kind is `CLIENT` or `SERVER`. This prevents non-database spans from being rewritten. Logs automatically enable a sequential fallback internally, so database attributes without `db.system` can still be sanitized when they appear in log records. diff --git a/processor/redactionprocessor/factory.go b/processor/redactionprocessor/factory.go index ccbd02bcc26af..fd316c11d6b38 100644 --- a/processor/redactionprocessor/factory.go +++ b/processor/redactionprocessor/factory.go @@ -64,8 +64,14 @@ func createLogsProcessor( next consumer.Logs, ) (processor.Logs, error) { oCfg := cfg.(*Config) - - red, err := newRedaction(ctx, oCfg, set.Logger) + logCfg := *oCfg + // Attributes are defined for metrics and traces: + // https://opentelemetry.io/docs/specs/semconv/database/ + // For logs, we don't rely on the "db.system.name" attribute to + // do the sanitization. + logCfg.DBSanitizer.AllowFallbackWithoutSystem = true + + red, err := newRedaction(ctx, &logCfg, set.Logger) if err != nil { return nil, fmt.Errorf("error creating a redaction processor: %w", err) } diff --git a/processor/redactionprocessor/go.mod b/processor/redactionprocessor/go.mod index b54bcfc687427..e92e37a6885a9 100644 --- a/processor/redactionprocessor/go.mod +++ b/processor/redactionprocessor/go.mod @@ -16,6 +16,7 @@ require ( go.opentelemetry.io/collector/processor v1.46.1-0.20251120204106-2e9c82787618 go.opentelemetry.io/collector/processor/processorhelper v0.140.1-0.20251120204106-2e9c82787618 go.opentelemetry.io/collector/processor/processortest v0.140.1-0.20251120204106-2e9c82787618 + go.opentelemetry.io/otel v1.38.0 go.uber.org/goleak v1.3.0 go.uber.org/zap v1.27.0 golang.org/x/crypto v0.45.0 @@ -54,7 +55,6 @@ require ( go.opentelemetry.io/collector/pdata/testdata v0.140.1-0.20251120204106-2e9c82787618 // indirect go.opentelemetry.io/collector/pipeline v1.46.1-0.20251120204106-2e9c82787618 // indirect go.opentelemetry.io/collector/processor/xprocessor v0.140.1-0.20251120204106-2e9c82787618 // indirect - go.opentelemetry.io/otel v1.38.0 // indirect go.opentelemetry.io/otel/metric v1.38.0 // indirect go.opentelemetry.io/otel/sdk v1.38.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.38.0 // indirect diff --git a/processor/redactionprocessor/internal/db/config.go b/processor/redactionprocessor/internal/db/config.go index 8bfc285031567..f5b60c44a17ab 100644 --- a/processor/redactionprocessor/internal/db/config.go +++ b/processor/redactionprocessor/internal/db/config.go @@ -11,6 +11,9 @@ type DBSanitizerConfig struct { MongoConfig MongoConfig `mapstructure:"mongo"` OpenSearchConfig OpenSearchConfig `mapstructure:"opensearch"` ESConfig ESConfig `mapstructure:"es"` + // AllowFallbackWithoutSystem enables sequential sanitization when `db.system` is missing. + // This is meant for logs contexts and is set internally, not via YAML. + AllowFallbackWithoutSystem bool `mapstructure:"-"` } type SQLConfig struct { diff --git a/processor/redactionprocessor/internal/db/db.go b/processor/redactionprocessor/internal/db/db.go index 59fff277018bf..851f7d8769959 100644 --- a/processor/redactionprocessor/internal/db/db.go +++ b/processor/redactionprocessor/internal/db/db.go @@ -4,12 +4,19 @@ package db // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/redactionprocessor/internal/db" import ( + "strings" + "github.com/DataDog/datadog-agent/pkg/obfuscate" + semconv128 "go.opentelemetry.io/otel/semconv/v1.28.0" + "go.uber.org/zap" ) type Obfuscator struct { - obfuscators []databaseObfuscator - processAttributesEnabled bool + obfuscators []databaseObfuscator + processAttributesEnabled bool + logger *zap.Logger + allowFallbackWithoutSystem bool + DBSystem string } func createAttributes(attributes []string) map[string]bool { @@ -20,7 +27,10 @@ func createAttributes(attributes []string) map[string]bool { return attributesMap } -func NewObfuscator(cfg DBSanitizerConfig) *Obfuscator { +func NewObfuscator(cfg DBSanitizerConfig, logger *zap.Logger) *Obfuscator { + if logger == nil { + logger = zap.NewNop() + } o := obfuscate.NewObfuscator(obfuscate.Config{ SQL: obfuscate.SQLConfig{ ReplaceDigits: true, @@ -49,83 +59,92 @@ func NewObfuscator(cfg DBSanitizerConfig) *Obfuscator { processAttributesEnabled := false if cfg.SQLConfig.Enabled { - attributes := createAttributes(cfg.SQLConfig.Attributes) - processAttributesEnabled = processAttributesEnabled || len(attributes) > 0 + dbAttrs := newDBAttributes(cfg.SQLConfig.Attributes, []string{ + semconv128.DBSystemOtherSQL.Value.AsString(), + semconv128.DBSystemMySQL.Value.AsString(), + semconv128.DBSystemPostgreSQL.Value.AsString(), + semconv128.DBSystemMariaDB.Value.AsString(), + semconv128.DBSystemSqlite.Value.AsString(), + }) + processAttributesEnabled = processAttributesEnabled || len(dbAttrs.attributes) > 0 obfuscators = append(obfuscators, &sqlObfuscator{ - dbAttributes: dbAttributes{ - attributes: attributes, - }, - obfuscator: o, + dbAttributes: dbAttrs, + obfuscator: o, }) } if cfg.RedisConfig.Enabled { - attributes := createAttributes(cfg.RedisConfig.Attributes) - processAttributesEnabled = processAttributesEnabled || len(attributes) > 0 + dbAttrs := newDBAttributes(cfg.RedisConfig.Attributes, []string{ + semconv128.DBSystemRedis.Value.AsString(), + }) + processAttributesEnabled = processAttributesEnabled || len(dbAttrs.attributes) > 0 obfuscators = append(obfuscators, &redisObfuscator{ - dbAttributes: dbAttributes{ - attributes: attributes, - }, - obfuscator: o, + dbAttributes: dbAttrs, + obfuscator: o, }) } if cfg.ValkeyConfig.Enabled { - attributes := createAttributes(cfg.ValkeyConfig.Attributes) - processAttributesEnabled = processAttributesEnabled || len(attributes) > 0 - obfuscators = append(obfuscators, &valkeyObfuscator{ - dbAttributes: dbAttributes{ - attributes: attributes, - }, - obfuscator: o, + dbAttrs := newDBAttributes(cfg.ValkeyConfig.Attributes, []string{ + "valkey", // Not part of semantic conventions + }) + processAttributesEnabled = processAttributesEnabled || len(dbAttrs.attributes) > 0 + obfuscators = append(obfuscators, &redisObfuscator{ + dbAttributes: dbAttrs, + obfuscator: o, }) } if cfg.MemcachedConfig.Enabled { - attributes := createAttributes(cfg.MemcachedConfig.Attributes) - processAttributesEnabled = processAttributesEnabled || len(attributes) > 0 + dbAttrs := newDBAttributes(cfg.MemcachedConfig.Attributes, []string{ + semconv128.DBSystemMemcached.Value.AsString(), + }) + processAttributesEnabled = processAttributesEnabled || len(dbAttrs.attributes) > 0 obfuscators = append(obfuscators, &memcachedObfuscator{ - dbAttributes: dbAttributes{ - attributes: attributes, - }, - obfuscator: o, + dbAttributes: dbAttrs, + obfuscator: o, }) } if cfg.MongoConfig.Enabled { - attributes := createAttributes(cfg.MongoConfig.Attributes) - processAttributesEnabled = processAttributesEnabled || len(attributes) > 0 + dbAttrs := newDBAttributes(cfg.MongoConfig.Attributes, []string{ + semconv128.DBSystemMongoDB.Value.AsString(), + }) + processAttributesEnabled = processAttributesEnabled || len(dbAttrs.attributes) > 0 obfuscators = append(obfuscators, &mongoObfuscator{ - dbAttributes: dbAttributes{ - attributes: attributes, - }, - obfuscator: o, + dbAttributes: dbAttrs, + obfuscator: o, + logger: logger, }) } if cfg.OpenSearchConfig.Enabled { - attributes := createAttributes([]string{}) + dbAttrs := newDBAttributes([]string{}, []string{ + "opensearch", // Not part of semantic conventions + }) obfuscators = append(obfuscators, &opensearchObfuscator{ - dbAttributes: dbAttributes{ - attributes: attributes, - }, - obfuscator: o, + dbAttributes: dbAttrs, + obfuscator: o, + logger: logger, }) } if cfg.ESConfig.Enabled { - attributes := createAttributes([]string{}) + dbAttrs := newDBAttributes([]string{}, []string{ + semconv128.DBSystemElasticsearch.Value.AsString(), + }) obfuscators = append(obfuscators, &esObfuscator{ - dbAttributes: dbAttributes{ - attributes: attributes, - }, - obfuscator: o, + dbAttributes: dbAttrs, + obfuscator: o, + logger: logger, }) } return &Obfuscator{ - obfuscators: obfuscators, - processAttributesEnabled: processAttributesEnabled, + obfuscators: obfuscators, + processAttributesEnabled: processAttributesEnabled, + logger: logger, + allowFallbackWithoutSystem: cfg.AllowFallbackWithoutSystem, } } @@ -144,14 +163,40 @@ func (o *Obfuscator) ObfuscateAttribute(attributeValue, attributeKey string) (st if !o.HasSpecificAttributes() { return attributeValue, nil } + + if o.DBSystem == "" { + if o.allowFallbackWithoutSystem { + return o.obfuscateSequentially(attributeValue, attributeKey) + } + return attributeValue, nil + } + for _, obfuscator := range o.obfuscators { - obfuscatedValue, err := obfuscator.ObfuscateAttribute(attributeValue, attributeKey) + if !obfuscator.SupportsSystem(o.DBSystem) { + continue + } + if !obfuscator.ShouldProcessAttribute(attributeKey) { + continue + } + return obfuscator.ObfuscateAttribute(attributeValue, attributeKey) + } + + return attributeValue, nil +} + +func (o *Obfuscator) obfuscateSequentially(attributeValue, attributeKey string) (string, error) { + result := attributeValue + for _, obfuscator := range o.obfuscators { + if !obfuscator.ShouldProcessAttribute(attributeKey) { + continue + } + obfuscatedValue, err := obfuscator.ObfuscateAttribute(result, attributeKey) if err != nil { return attributeValue, err } - attributeValue = obfuscatedValue + result = obfuscatedValue } - return attributeValue, nil + return result, nil } func (o *Obfuscator) HasSpecificAttributes() bool { @@ -161,3 +206,38 @@ func (o *Obfuscator) HasSpecificAttributes() bool { func (o *Obfuscator) HasObfuscators() bool { return len(o.obfuscators) > 0 } + +func (o *Obfuscator) ObfuscateWithSystem(val, dbSystem string) (string, error) { + if !o.HasObfuscators() { + return val, nil + } + if dbSystem == "" { + return val, nil + } + lower := strings.ToLower(dbSystem) + for _, obfuscator := range o.obfuscators { + if !obfuscator.SupportsSystem(lower) { + continue + } + return obfuscator.ObfuscateWithSystem(val, lower) + } + return val, nil +} + +func createSystems(systems []string) map[string]bool { + if len(systems) == 0 { + return nil + } + systemsMap := make(map[string]bool, len(systems)) + for _, system := range systems { + systemsMap[strings.ToLower(system)] = true + } + return systemsMap +} + +func newDBAttributes(attributes, systems []string) dbAttributes { + return dbAttributes{ + attributes: createAttributes(attributes), + dbSystems: createSystems(systems), + } +} diff --git a/processor/redactionprocessor/internal/db/db_test.go b/processor/redactionprocessor/internal/db/db_test.go index 9c83b3714b8b2..12a52c622efc2 100644 --- a/processor/redactionprocessor/internal/db/db_test.go +++ b/processor/redactionprocessor/internal/db/db_test.go @@ -7,6 +7,8 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap/zaptest" ) func TestCreateAttributes(t *testing.T) { @@ -156,7 +158,7 @@ func TestNewObfuscator(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - o := NewObfuscator(tt.config) + o := NewObfuscator(tt.config, zaptest.NewLogger(t)) assert.Len(t, o.obfuscators, tt.expectedObfuscatorCount) assert.Equal(t, tt.expectedProcessSpecific, o.HasSpecificAttributes()) assert.Equal(t, tt.expectedObfuscatorCount > 0, o.HasObfuscators()) @@ -176,7 +178,7 @@ func TestObfuscate(t *testing.T) { }, } - o := NewObfuscator(config) + o := NewObfuscator(config, zaptest.NewLogger(t)) tests := []struct { name string @@ -211,14 +213,105 @@ func TestObfuscate(t *testing.T) { } } +func TestObfuscateWithSystem(t *testing.T) { + config := DBSanitizerConfig{ + SQLConfig: SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + RedisConfig: RedisConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + } + + o := NewObfuscator(config, zaptest.NewLogger(t)) + + t.Run("sql system", func(t *testing.T) { + result, err := o.ObfuscateWithSystem("SELECT * FROM users WHERE id = 1", "mysql") + require.NoError(t, err) + assert.Equal(t, "SELECT * FROM users WHERE id = ?", result) + }) + + t.Run("redis system", func(t *testing.T) { + result, err := o.ObfuscateWithSystem("SET user:1 top-secret", "redis") + require.NoError(t, err) + assert.Equal(t, "SET user:1 ?", result) + }) + + t.Run("missing system leaves value unchanged", func(t *testing.T) { + result, err := o.ObfuscateWithSystem("SELECT * FROM users WHERE id = 1", "") + require.NoError(t, err) + assert.Equal(t, "SELECT * FROM users WHERE id = 1", result) + }) + + t.Run("unknown system leaves unchanged", func(t *testing.T) { + result, err := o.ObfuscateWithSystem("SET key value", "unknown") + require.NoError(t, err) + assert.Equal(t, "SET key value", result) + }) +} + +func TestNewObfuscatorWithNilLogger(t *testing.T) { + config := DBSanitizerConfig{ + SQLConfig: SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + } + o := NewObfuscator(config, nil) + assert.NotNil(t, o) + assert.NotNil(t, o.logger) +} + +func TestObfuscateWithNoObfuscators(t *testing.T) { + o := NewObfuscator(DBSanitizerConfig{}, zaptest.NewLogger(t)) + result, err := o.ObfuscateWithSystem("SELECT * FROM users WHERE id = 1", "mysql") + require.NoError(t, err) + assert.Equal(t, "SELECT * FROM users WHERE id = 1", result) +} + +func TestCreateSystems(t *testing.T) { + tests := []struct { + name string + systems []string + expected map[string]bool + }{ + { + name: "empty systems", + systems: []string{}, + expected: nil, + }, + { + name: "single system", + systems: []string{"MySQL"}, + expected: map[string]bool{"mysql": true}, + }, + { + name: "multiple systems", + systems: []string{"MySQL", "PostgreSQL", "Redis"}, + expected: map[string]bool{"mysql": true, "postgresql": true, "redis": true}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := createSystems(tt.systems) + assert.Equal(t, tt.expected, result) + }) + } +} + func TestObfuscateAttribute(t *testing.T) { tests := []struct { name string config DBSanitizerConfig value string key string + dbSystem string expected string wantErr bool + fallback bool }{ { name: "sql statement", @@ -230,8 +323,24 @@ func TestObfuscateAttribute(t *testing.T) { }, value: "SELECT * FROM users WHERE id = 123", key: "db.statement", + dbSystem: "mysql", expected: "SELECT * FROM users WHERE id = ?", wantErr: false, + fallback: false, + }, + { + name: "sql statement with mariadb system", + config: DBSanitizerConfig{ + SQLConfig: SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + }, + value: "UPDATE accounts SET balance = 10 WHERE id = 7", + key: "db.statement", + dbSystem: "mariadb", + expected: "UPDATE accounts SET balance = ? WHERE id = ?", + wantErr: false, }, { name: "non-db attribute", @@ -243,20 +352,46 @@ func TestObfuscateAttribute(t *testing.T) { }, value: "SELECT * FROM users", key: "other.field", + dbSystem: "mysql", expected: "SELECT * FROM users", wantErr: false, }, { - name: "no specific attributes", + name: "missing db.system leaves attribute unchanged", config: DBSanitizerConfig{ SQLConfig: SQLConfig{ - Enabled: true, + Enabled: true, + Attributes: []string{"db.statement"}, + }, + RedisConfig: RedisConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, }, }, - value: "SELECT * FROM users", + value: "SET user:123 john", key: "db.statement", - expected: "SELECT * FROM users", + dbSystem: "", + expected: "SET user:123 john", + wantErr: false, + }, + { + name: "missing db.system with fallback obfuscates sequentially", + config: DBSanitizerConfig{ + SQLConfig: SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + RedisConfig: RedisConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + }, + value: "SET user:123 john", + key: "db.statement", + dbSystem: "", + expected: "SET user:? ?", wantErr: false, + fallback: true, }, { name: "no specific attributes", @@ -267,14 +402,70 @@ func TestObfuscateAttribute(t *testing.T) { }, value: "SELECT * FROM users", key: "db.statement", + dbSystem: "mysql", expected: "SELECT * FROM users", wantErr: false, }, + { + name: "redis system selects redis obfuscator", + config: DBSanitizerConfig{ + SQLConfig: SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + RedisConfig: RedisConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + }, + value: "SET user:123 john", + key: "db.statement", + dbSystem: "redis", + expected: "SET user:123 ?", + wantErr: false, + }, + { + name: "valkey system uses valkey obfuscator", + config: DBSanitizerConfig{ + ValkeyConfig: ValkeyConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + }, + value: "SET key value", + key: "db.statement", + dbSystem: "valkey", + expected: "SET key ?", + wantErr: false, + }, + { + name: "mismatched system leaves attribute unchanged", + config: DBSanitizerConfig{ + SQLConfig: SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + RedisConfig: RedisConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + }, + value: "SET user:123 john", + key: "db.statement", + dbSystem: "cassandra", + expected: "SET user:123 john", + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - o := NewObfuscator(tt.config) + cfg := tt.config + if tt.fallback { + cfg.AllowFallbackWithoutSystem = true + } + o := NewObfuscator(cfg, zaptest.NewLogger(t)) + o.DBSystem = tt.dbSystem result, err := o.ObfuscateAttribute(tt.value, tt.key) if tt.wantErr { assert.Error(t, err) diff --git a/processor/redactionprocessor/internal/db/obfuscators.go b/processor/redactionprocessor/internal/db/obfuscators.go index 6b26ce40ee434..a7cdbbd5c0e05 100644 --- a/processor/redactionprocessor/internal/db/obfuscators.go +++ b/processor/redactionprocessor/internal/db/obfuscators.go @@ -3,22 +3,38 @@ package db // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/redactionprocessor/internal/db" -import "github.com/DataDog/datadog-agent/pkg/obfuscate" +import ( + "encoding/json" + "strings" + + "github.com/DataDog/datadog-agent/pkg/obfuscate" + "go.uber.org/zap" +) type databaseObfuscator interface { Obfuscate(string) (string, error) + ObfuscateWithSystem(string, string) (string, error) ObfuscateAttribute(string, string) (string, error) ShouldProcessAttribute(string) bool + SupportsSystem(string) bool } type dbAttributes struct { attributes map[string]bool + dbSystems map[string]bool } func (d *dbAttributes) ShouldProcessAttribute(attributeKey string) bool { return d.attributes[attributeKey] } +func (d *dbAttributes) SupportsSystem(dbSystem string) bool { + if len(d.dbSystems) == 0 || dbSystem == "" { + return false + } + return d.dbSystems[dbSystem] +} + type sqlObfuscator struct { dbAttributes obfuscator *obfuscate.Obfuscator @@ -41,6 +57,14 @@ func (o *sqlObfuscator) ObfuscateAttribute(attributeValue, attributeKey string) return o.Obfuscate(attributeValue) } +func (o *sqlObfuscator) ObfuscateWithSystem(s, dbSystem string) (string, error) { + obfuscatedQuery, err := o.obfuscator.ObfuscateSQLStringForDBMS(s, dbSystem) + if err != nil { + return s, err + } + return obfuscatedQuery.Query, nil +} + type redisObfuscator struct { dbAttributes obfuscator *obfuscate.Obfuscator @@ -59,22 +83,8 @@ func (o *redisObfuscator) ObfuscateAttribute(attributeValue, attributeKey string return o.Obfuscate(attributeValue) } -type valkeyObfuscator struct { - dbAttributes - obfuscator *obfuscate.Obfuscator -} - -var _ databaseObfuscator = &valkeyObfuscator{} - -func (o *valkeyObfuscator) Obfuscate(s string) (string, error) { - return o.obfuscator.ObfuscateRedisString(s), nil -} - -func (o *valkeyObfuscator) ObfuscateAttribute(attributeValue, attributeKey string) (string, error) { - if !o.ShouldProcessAttribute(attributeKey) { - return attributeValue, nil - } - return o.Obfuscate(attributeValue) +func (o *redisObfuscator) ObfuscateWithSystem(s, _ string) (string, error) { + return o.Obfuscate(s) } type memcachedObfuscator struct { @@ -95,14 +105,23 @@ func (o *memcachedObfuscator) ObfuscateAttribute(attributeValue, attributeKey st return o.Obfuscate(attributeValue) } +func (o *memcachedObfuscator) ObfuscateWithSystem(s, _ string) (string, error) { + return o.Obfuscate(s) +} + type mongoObfuscator struct { dbAttributes obfuscator *obfuscate.Obfuscator + logger *zap.Logger } var _ databaseObfuscator = &mongoObfuscator{} func (o *mongoObfuscator) Obfuscate(s string) (string, error) { + if !isValidJSON(s) { + o.logger.Debug("mongo span name not obfuscated due to invalid JSON input") + return s, nil + } return o.obfuscator.ObfuscateMongoDBString(s), nil } @@ -113,14 +132,23 @@ func (o *mongoObfuscator) ObfuscateAttribute(attributeValue, attributeKey string return o.Obfuscate(attributeValue) } +func (o *mongoObfuscator) ObfuscateWithSystem(s, _ string) (string, error) { + return o.Obfuscate(s) +} + type opensearchObfuscator struct { dbAttributes obfuscator *obfuscate.Obfuscator + logger *zap.Logger } var _ databaseObfuscator = &opensearchObfuscator{} func (o *opensearchObfuscator) Obfuscate(s string) (string, error) { + if !isValidJSON(s) { + o.logger.Debug("opensearch span name not obfuscated due to invalid JSON input") + return s, nil + } return o.obfuscator.ObfuscateOpenSearchString(s), nil } @@ -131,14 +159,23 @@ func (o *opensearchObfuscator) ObfuscateAttribute(attributeValue, attributeKey s return o.Obfuscate(attributeValue) } +func (o *opensearchObfuscator) ObfuscateWithSystem(s, _ string) (string, error) { + return o.Obfuscate(s) +} + type esObfuscator struct { dbAttributes obfuscator *obfuscate.Obfuscator + logger *zap.Logger } var _ databaseObfuscator = &esObfuscator{} func (o *esObfuscator) Obfuscate(s string) (string, error) { + if !isValidJSON(s) { + o.logger.Debug("elasticsearch span name not obfuscated due to invalid JSON input") + return s, nil + } return o.obfuscator.ObfuscateElasticSearchString(s), nil } @@ -148,3 +185,14 @@ func (o *esObfuscator) ObfuscateAttribute(attributeValue, attributeKey string) ( } return o.Obfuscate(attributeValue) } + +func (o *esObfuscator) ObfuscateWithSystem(s, _ string) (string, error) { + return o.Obfuscate(s) +} + +func isValidJSON(value string) bool { + if strings.TrimSpace(value) == "" { + return false + } + return json.Valid([]byte(value)) +} diff --git a/processor/redactionprocessor/internal/db/obfuscators_test.go b/processor/redactionprocessor/internal/db/obfuscators_test.go index 378314e0ce98e..1d81467d83cf7 100644 --- a/processor/redactionprocessor/internal/db/obfuscators_test.go +++ b/processor/redactionprocessor/internal/db/obfuscators_test.go @@ -8,6 +8,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/obfuscate" "github.com/stretchr/testify/assert" + "go.uber.org/zap/zaptest" ) func TestSQLObfuscator(t *testing.T) { @@ -244,6 +245,7 @@ func TestMongoObfuscator(t *testing.T) { Enabled: true, }, }), + logger: zaptest.NewLogger(t), } tests := []struct { @@ -267,6 +269,13 @@ func TestMongoObfuscator(t *testing.T) { attributeKey: "other.field", shouldProcess: false, }, + { + name: "non-json value", + input: "/foo/{bar}", + expected: "/foo/{bar}", + attributeKey: "db.statement", + shouldProcess: true, + }, } for _, tt := range tests { @@ -304,6 +313,7 @@ func TestOpenSearchObfuscator(t *testing.T) { Enabled: true, }, }), + logger: zaptest.NewLogger(t), } tests := []struct { @@ -327,6 +337,13 @@ func TestOpenSearchObfuscator(t *testing.T) { attributeKey: "other.field", shouldProcess: false, }, + { + name: "non-json value", + input: "/foo/bar", + expected: "/foo/bar", + attributeKey: "db.statement", + shouldProcess: true, + }, } for _, tt := range tests { @@ -339,12 +356,15 @@ func TestOpenSearchObfuscator(t *testing.T) { } } -func TestValkeyObfuscator(t *testing.T) { - o := &valkeyObfuscator{ +func TestRedisObfuscatorWithValkeyConfig(t *testing.T) { + o := &redisObfuscator{ dbAttributes: dbAttributes{ attributes: map[string]bool{ "db.statement": true, }, + dbSystems: map[string]bool{ + "valkey": true, + }, }, obfuscator: obfuscate.NewObfuscator(obfuscate.Config{ SQL: obfuscate.SQLConfig{ @@ -424,6 +444,7 @@ func TestElasticsearchObfuscator(t *testing.T) { Enabled: true, }, }), + logger: zaptest.NewLogger(t), } tests := []struct { @@ -447,6 +468,13 @@ func TestElasticsearchObfuscator(t *testing.T) { attributeKey: "other.field", shouldProcess: false, }, + { + name: "non-json value", + input: "/svc/resource", + expected: "/svc/resource", + attributeKey: "db.statement", + shouldProcess: true, + }, } for _, tt := range tests { diff --git a/processor/redactionprocessor/internal/db/spanname.go b/processor/redactionprocessor/internal/db/spanname.go new file mode 100644 index 0000000000000..ce02820c93190 --- /dev/null +++ b/processor/redactionprocessor/internal/db/spanname.go @@ -0,0 +1,56 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package db // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/redactionprocessor/internal/db" + +import ( + "strings" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" + semconv128 "go.opentelemetry.io/otel/semconv/v1.28.0" + semconv137 "go.opentelemetry.io/otel/semconv/v1.37.0" +) + +// SanitizeSpanName obfuscates the span name if it represents a database statement. +// Returns the obfuscated name, whether a change was made, and any error encountered. +func SanitizeSpanName(span ptrace.Span, obfuscator *Obfuscator) (string, bool, error) { + if obfuscator == nil || !obfuscator.HasObfuscators() { + return "", false, nil + } + + name := span.Name() + + kind := span.Kind() + if kind != ptrace.SpanKindClient && kind != ptrace.SpanKindServer && kind != ptrace.SpanKindInternal { + return "", false, nil + } + + dbSystem := GetDBSystem(span.Attributes()) + if dbSystem == "" { + return name, false, nil + } + + obfuscated, err := obfuscator.ObfuscateWithSystem(name, dbSystem) + if err != nil { + return "", false, err + } + if obfuscated == name { + return "", false, nil + } + return obfuscated, true, nil +} + +func GetDBSystem(attributes pcommon.Map) string { + if system := getStringAttrLower(attributes, string(semconv137.DBSystemNameKey)); system != "" { + return system + } + return getStringAttrLower(attributes, string(semconv128.DBSystemKey)) +} + +func getStringAttrLower(attributes pcommon.Map, key string) string { + if value, ok := attributes.Get(key); ok && value.Type() == pcommon.ValueTypeStr { + return strings.ToLower(value.Str()) + } + return "" +} diff --git a/processor/redactionprocessor/internal/db/spanname_test.go b/processor/redactionprocessor/internal/db/spanname_test.go new file mode 100644 index 0000000000000..cb3c1ea200137 --- /dev/null +++ b/processor/redactionprocessor/internal/db/spanname_test.go @@ -0,0 +1,44 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package db + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/ptrace" + "go.uber.org/zap/zaptest" +) + +func TestSanitizeSpanNameRequiresDBSystem(t *testing.T) { + obfuscator := NewObfuscator(DBSanitizerConfig{ + SQLConfig: SQLConfig{Enabled: true}, + }, zaptest.NewLogger(t)) + + span := ptrace.NewSpan() + span.SetKind(ptrace.SpanKindClient) + span.SetName("SELECT 1") + + _, ok, err := SanitizeSpanName(span, obfuscator) + require.NoError(t, err) + assert.False(t, ok) +} + +func TestSanitizeSpanNameWithDBSystemName(t *testing.T) { + obfuscator := NewObfuscator(DBSanitizerConfig{ + SQLConfig: SQLConfig{Enabled: true}, + }, zaptest.NewLogger(t)) + + span := ptrace.NewSpan() + span.SetKind(ptrace.SpanKindClient) + span.SetName("SELECT * FROM users WHERE id = 42") + span.Attributes().PutStr("db.system.name", "mysql") + span.Attributes().PutStr("db.statement", "SELECT * FROM users WHERE id = 42") + + result, ok, err := SanitizeSpanName(span, obfuscator) + require.NoError(t, err) + assert.True(t, ok) + assert.Equal(t, "SELECT * FROM users WHERE id = ?", result) +} diff --git a/processor/redactionprocessor/internal/url/sanitizer_test.go b/processor/redactionprocessor/internal/url/sanitizer_test.go new file mode 100644 index 0000000000000..f8e867b0d212c --- /dev/null +++ b/processor/redactionprocessor/internal/url/sanitizer_test.go @@ -0,0 +1,207 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package url + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewURLSanitizer(t *testing.T) { + tests := []struct { + name string + config URLSanitizationConfig + wantOk bool + }{ + { + name: "valid config with attributes", + config: URLSanitizationConfig{ + Enabled: true, + Attributes: []string{"http.url", "http.target"}, + }, + wantOk: true, + }, + { + name: "valid config without attributes", + config: URLSanitizationConfig{ + Enabled: true, + }, + wantOk: true, + }, + { + name: "valid config disabled", + config: URLSanitizationConfig{ + Enabled: false, + }, + wantOk: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sanitizer, err := NewURLSanitizer(tt.config) + if tt.wantOk { + require.NoError(t, err) + assert.NotNil(t, sanitizer) + assert.NotNil(t, sanitizer.classifier) + assert.Len(t, sanitizer.attributes, len(tt.config.Attributes)) + } else { + assert.Error(t, err) + } + }) + } +} + +func TestURLSanitizer_SanitizeURL(t *testing.T) { + config := URLSanitizationConfig{ + Enabled: true, + Attributes: []string{"http.url"}, + } + sanitizer, err := NewURLSanitizer(config) + require.NoError(t, err) + + tests := []struct { + name string + input string + expected string + }{ + { + name: "url with uuid", + input: "/api/users/123e4567-e89b-12d3-a456-426614174000", + expected: "/api/users/*", + }, + { + name: "url with numeric id", + input: "/api/users/12345", + expected: "/api/users/*", + }, + { + name: "url with multiple ids", + input: "/api/users/123/posts/456", + expected: "/api/users/*/posts/*", + }, + { + name: "simple path", + input: "/api/users", + expected: "/api/users", + }, + { + name: "url with query params", + input: "/api/users?id=123&name=john", + expected: "/api/users", + }, + { + name: "empty url", + input: "", + expected: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := sanitizer.SanitizeURL(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestURLSanitizer_SanitizeAttributeURL(t *testing.T) { + config := URLSanitizationConfig{ + Enabled: true, + Attributes: []string{"http.url", "http.target"}, + } + sanitizer, err := NewURLSanitizer(config) + require.NoError(t, err) + + tests := []struct { + name string + url string + attributeKey string + shouldProcess bool + }{ + { + name: "configured attribute http.url", + url: "/api/users/123", + attributeKey: "http.url", + shouldProcess: true, + }, + { + name: "configured attribute http.target", + url: "/api/posts/456", + attributeKey: "http.target", + shouldProcess: true, + }, + { + name: "non-configured attribute", + url: "/api/users/123", + attributeKey: "http.method", + shouldProcess: false, + }, + { + name: "empty url with configured attribute", + url: "", + attributeKey: "http.url", + shouldProcess: true, + }, + { + name: "empty url with non-configured attribute", + url: "", + attributeKey: "http.method", + shouldProcess: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := sanitizer.SanitizeAttributeURL(tt.url, tt.attributeKey) + + if tt.url == "" { + assert.Empty(t, result, "empty URL should remain empty") + return + } + + if tt.shouldProcess { + assert.NotNil(t, result) + } else { + assert.Equal(t, tt.url, result, "URL should not be modified for non-configured attributes") + } + }) + } +} + +func TestURLSanitizer_EmptyAttributes(t *testing.T) { + config := URLSanitizationConfig{ + Enabled: true, + Attributes: []string{}, + } + sanitizer, err := NewURLSanitizer(config) + require.NoError(t, err) + + tests := []struct { + name string + url string + attributeKey string + }{ + { + name: "no configured attributes - http.url", + url: "/api/users/123", + attributeKey: "http.url", + }, + { + name: "no configured attributes - http.target", + url: "/api/posts/456", + attributeKey: "http.target", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := sanitizer.SanitizeAttributeURL(tt.url, tt.attributeKey) + // When no attributes are configured, URLs should not be sanitized + assert.Equal(t, tt.url, result) + }) + } +} diff --git a/processor/redactionprocessor/internal/url/spanname.go b/processor/redactionprocessor/internal/url/spanname.go new file mode 100644 index 0000000000000..dde4aab735125 --- /dev/null +++ b/processor/redactionprocessor/internal/url/spanname.go @@ -0,0 +1,75 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package url // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/redactionprocessor/internal/url" + +import ( + "strings" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" + semconv125 "go.opentelemetry.io/otel/semconv/v1.25.0" + semconv137 "go.opentelemetry.io/otel/semconv/v1.37.0" +) + +// SanitizeSpanName sanitizes the span name if the span looks like an HTTP span. +// It returns the sanitized name and true when a change was made. +func SanitizeSpanName(span ptrace.Span, sanitizer *URLSanitizer) (string, bool) { + if sanitizer == nil { + return "", false + } + + if !shouldSanitizeSpan(span) { + return "", false + } + + name := span.Name() + sanitized := sanitizer.SanitizeURL(name) + if sanitized == name { + return "", false + } + + // This means the full span name was replaced + if sanitized == "*" { + return name, false + } + + return sanitized, true +} + +func shouldSanitizeSpan(span ptrace.Span) bool { + kind := span.Kind() + if kind != ptrace.SpanKindClient && kind != ptrace.SpanKindServer { + return false + } + + attrs := span.Attributes() + spanName := span.Name() + + if !hasHTTPAttributes(attrs) && !strings.Contains(spanName, "/") { + return false + } + + return true +} + +var httpAttributeKeys = []string{ + string(semconv137.HTTPRouteKey), + string(semconv137.HTTPRequestMethodKey), + string(semconv137.HTTPRequestMethodOriginalKey), + string(semconv137.HTTPResponseStatusCodeKey), + string(semconv137.URLFullKey), + string(semconv125.HTTPSchemeKey), + string(semconv125.HTTPTargetKey), + string(semconv125.HTTPMethodKey), + string(semconv125.HTTPURLKey), +} + +func hasHTTPAttributes(attrs pcommon.Map) bool { + for _, key := range httpAttributeKeys { + if _, ok := attrs.Get(key); ok { + return true + } + } + return false +} diff --git a/processor/redactionprocessor/internal/url/spanname_test.go b/processor/redactionprocessor/internal/url/spanname_test.go new file mode 100644 index 0000000000000..5139364de2ee2 --- /dev/null +++ b/processor/redactionprocessor/internal/url/spanname_test.go @@ -0,0 +1,45 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package url + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/ptrace" + semconv128 "go.opentelemetry.io/otel/semconv/v1.28.0" +) + +func TestShouldSanitizeSpanRequiresHTTPSignal(t *testing.T) { + span := ptrace.NewSpan() + span.SetKind(ptrace.SpanKindClient) + span.SetName("operation_without_route") + + assert.False(t, shouldSanitizeSpan(span)) + + span.Attributes().PutStr(string(semconv128.HTTPRouteKey), "/users/{id}") + assert.True(t, shouldSanitizeSpan(span)) +} + +func TestShouldSanitizeSpanRequiresClientOrServer(t *testing.T) { + span := ptrace.NewSpan() + span.SetKind(ptrace.SpanKindInternal) + span.SetName("/users/123") + + assert.False(t, shouldSanitizeSpan(span)) +} + +func TestSanitizeSpanName(t *testing.T) { + sanitizer, err := NewURLSanitizer(URLSanitizationConfig{}) + require.NoError(t, err) + + span := ptrace.NewSpan() + span.SetKind(ptrace.SpanKindClient) + span.SetName("/payments/12345/detail") + + sanitized, ok := SanitizeSpanName(span, sanitizer) + assert.True(t, ok) + assert.Equal(t, "/payments/*/detail", sanitized) +} diff --git a/processor/redactionprocessor/processor.go b/processor/redactionprocessor/processor.go index 65e6c86916e59..de9fbd77951b0 100644 --- a/processor/redactionprocessor/processor.go +++ b/processor/redactionprocessor/processor.go @@ -79,7 +79,7 @@ func newRedaction(ctx context.Context, config *Config, logger *zap.Logger) (*red return nil, fmt.Errorf("failed to create URL sanitizer: %w", err) } } - dbObfuscator := db.NewObfuscator(config.DBSanitizer) + dbObfuscator := db.NewObfuscator(config.DBSanitizer, logger) return &redaction{ allowList: allowList, @@ -143,20 +143,7 @@ func (s *redaction) processResourceSpan(ctx context.Context, rs ptrace.ResourceS // Attributes can also be part of span events s.processSpanEvents(ctx, span.Events()) - if s.shouldRedactSpanName(&span) { - name := span.Name() - if s.urlSanitizer != nil { - name = s.urlSanitizer.SanitizeURL(name) - } - if s.dbObfuscator.HasObfuscators() { - var err error - name, err = s.dbObfuscator.Obfuscate(name) - if err != nil { - s.logger.Error(err.Error()) - } - } - span.SetName(name) - } + s.sanitizeSpanName(span) } } } @@ -330,6 +317,10 @@ func (s *redaction) processAttrs(_ context.Context, attributes pcommon.Map) { // TODO: Use the context for recording metrics var redactedKeys, maskedKeys, allowedKeys, ignoredKeys []string + if s.dbObfuscator != nil { + s.dbObfuscator.DBSystem = db.GetDBSystem(attributes) + } + // Identify attributes to redact and mask in the following sequence // 1. Make a list of attribute keys to redact // 2. Mask any blocked values for the other attributes @@ -508,20 +499,28 @@ func (s *redaction) shouldRedactKey(k string) bool { return false } -func (s *redaction) shouldRedactSpanName(span *ptrace.Span) bool { - if s.urlSanitizer == nil && !s.dbObfuscator.HasObfuscators() { - return false +func (s *redaction) sanitizeSpanName(span ptrace.Span) { + name := span.Name() + + if sanitized, ok := url.SanitizeSpanName(span, s.urlSanitizer); ok { + applySpanName(span, name, sanitized) + return } - spanKind := span.Kind() - if spanKind != ptrace.SpanKindClient && spanKind != ptrace.SpanKindServer { - return false + + if sanitized, ok, err := db.SanitizeSpanName(span, s.dbObfuscator); err != nil { + s.logger.Error("failed to obfuscate span name", zap.Error(err)) + } else if ok { + applySpanName(span, name, sanitized) } +} - spanName := span.Name() - if !strings.Contains(spanName, "/") && !s.dbObfuscator.HasObfuscators() { - return false +func applySpanName(span ptrace.Span, original, candidate string) { + if candidate == "" || candidate == "..." { + return + } + if candidate != original { + span.SetName(candidate) } - return !s.shouldAllowValue(spanName) } const ( diff --git a/processor/redactionprocessor/processor_spanname_test.go b/processor/redactionprocessor/processor_spanname_test.go new file mode 100644 index 0000000000000..1216ab9fc9317 --- /dev/null +++ b/processor/redactionprocessor/processor_spanname_test.go @@ -0,0 +1,793 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package redactionprocessor + +import ( + "maps" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/ptrace" + semconv128 "go.opentelemetry.io/otel/semconv/v1.28.0" + "go.uber.org/zap/zaptest" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/redactionprocessor/internal/db" + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/redactionprocessor/internal/url" +) + +const dbSystemKey = string(semconv128.DBSystemKey) + +var ( + mysqlSystem = semconv128.DBSystemMySQL.Value.AsString() + redisSystem = semconv128.DBSystemRedis.Value.AsString() + memcachedSystem = semconv128.DBSystemMemcached.Value.AsString() + mongoSystem = semconv128.DBSystemMongoDB.Value.AsString() + openSearchSystem = semconv128.DBSystemOpensearch.Value.AsString() + elasticsearchSystem = semconv128.DBSystemElasticsearch.Value.AsString() +) + +func TestURLSanitizationSpanName(t *testing.T) { + tests := []struct { + name string + config *Config + spanName string + kind ptrace.SpanKind + expected string + }{ + { + name: "url_enabled_client_span", + config: &Config{ + AllowAllKeys: true, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + }, + spanName: "/users/123/profile", + kind: ptrace.SpanKindClient, + expected: "/users/*/profile", + }, + { + name: "url_enabled_non_client_server", + config: &Config{ + AllowAllKeys: true, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + }, + spanName: "/users/123/profile", + kind: ptrace.SpanKindInternal, + expected: "/users/123/profile", + }, + { + name: "url_and_db_sanitizers_enabled_without_db_system", + config: &Config{ + AllowAllKeys: true, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{ + Enabled: true, + }, + }, + }, + spanName: "/payments/123/details", + kind: ptrace.SpanKindClient, + expected: "/payments/*/details", + }, + { + name: "url_and_db_sanitizers_enabled_with_db_system", + config: &Config{ + AllowAllKeys: true, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{ + Enabled: true, + }, + }, + }, + spanName: "SELECT balance FROM accounts WHERE id = 7", + kind: ptrace.SpanKindClient, + expected: "SELECT balance FROM accounts WHERE id = ?", + }, + { + name: "url_enabled_with_query_string", + config: &Config{ + AllowAllKeys: true, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + }, + spanName: "GET /orders/123?sql=SELECT+1", + kind: ptrace.SpanKindClient, + expected: "GET /orders/*", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + attrs := map[string]string{} + if strings.HasPrefix(tc.spanName, "SELECT") { + attrs[dbSystemKey] = mysqlSystem + } + + result := runSpanNameTest(t, tc.config, tc.spanName, tc.kind, attrs) + assert.Equal(t, tc.expected, result) + }) + } +} + +func TestURLSanitizationSpanNameWithBlockedValues(t *testing.T) { + t.Run("span name with blocked value should not be sanitized", func(t *testing.T) { + config := &Config{ + AllowAllKeys: true, + BlockedValues: []string{"4[0-9]{12}(?:[0-9]{3})?"}, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + } + + result := runSpanNameTest(t, config, "/users/4111111111111111/profile", ptrace.SpanKindInternal, nil) + assert.Equal(t, "/users/4111111111111111/profile", result) + }) + + t.Run("span name with service identifiers should be sanitized when allowed", func(t *testing.T) { + config := &Config{ + AllowAllKeys: true, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + } + + result := runSpanNameTest(t, config, "payments-dev-sql-adapter-queue /api/process/123", ptrace.SpanKindServer, nil) + assert.Equal(t, "payments-dev-sql-adapter-queue /api/process/*", result) + }) + + t.Run("span name with secret pattern should not be sanitized", func(t *testing.T) { + config := &Config{ + AllowAllKeys: true, + BlockedValues: []string{"secret-[0-9]+"}, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + } + + result := runSpanNameTest(t, config, "payments-service /api/secret-123/process", ptrace.SpanKindInternal, nil) + assert.Equal(t, "payments-service /api/secret-123/process", result) + }) + + t.Run("span name without slash should not be sanitized even when URL sanitization enabled", func(t *testing.T) { + config := &Config{ + AllowAllKeys: true, + AllowedValues: []string{".*"}, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + } + + clientResult := runSpanNameTest( + t, config, "us-west2-inventory-dev-catalog-sql-adapter-queue process", ptrace.SpanKindClient, nil, + ) + serverResult := runSpanNameTest( + t, config, "eu-central1-shipping-prod-delivery-processor handle", ptrace.SpanKindServer, nil, + ) + internalResult := runSpanNameTest( + t, config, "cache-service-lookup get-item", ptrace.SpanKindInternal, nil, + ) + + assert.Equal(t, "us-west2-inventory-dev-catalog-sql-adapter-queue process", clientResult) + assert.Equal(t, "eu-central1-shipping-prod-delivery-processor handle", serverResult) + assert.Equal(t, "cache-service-lookup get-item", internalResult) + }) + + t.Run("span name with slash should be sanitized when conditions met", func(t *testing.T) { + config := &Config{ + AllowAllKeys: true, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + } + + result := runSpanNameTest(t, config, "GET /api/v1/payments/123", ptrace.SpanKindClient, nil) + assert.Equal(t, "GET /api/v1/payments/*", result) + }) +} + +func TestDBSpanNameUntouchedWhenDBSystemMissing(t *testing.T) { + config := &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{ + Enabled: true, + }, + }, + } + + result := runSpanNameTest( + t, config, "SELECT count(*) FROM orders WHERE customer_id = 42", ptrace.SpanKindClient, nil, + ) + assert.Equal(t, "SELECT count(*) FROM orders WHERE customer_id = 42", result) +} + +func TestSpanNameURLAndDBSanitizationCombined(t *testing.T) { + config := &Config{ + AllowAllKeys: true, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{ + Enabled: true, + }, + }, + } + + result := runSpanNameTest( + t, + config, + "GET /orders/123/lines/456?sql=SELECT+1", + ptrace.SpanKindClient, + map[string]string{dbSystemKey: mysqlSystem}, + ) + assert.Equal(t, "GET /orders/*/lines/*", result) +} + +type spanTestCase struct { + name string + spanName string + kind ptrace.SpanKind + attrs map[string]string + urlSanitized string + sqlSanitized string + redisSanitized string + valkeySanitized string + memcachedSanitized string + mongoSanitized string + openSearchSanitized string + elasticsearchSanitized string +} + +type sanitizerState struct { + url bool + sql bool + redis bool + valkey bool + memcached bool + mongo bool + openSearch bool + elastic bool +} + +func TestSpanNameAllSanitizerCombinations(t *testing.T) { + spanCases := []spanTestCase{ + { + name: "url_span", + spanName: "/orders/123/detail", + kind: ptrace.SpanKindClient, + urlSanitized: "/orders/*/detail", + }, + { + name: "sql_span", + spanName: "SELECT * FROM accounts WHERE id = 42", + kind: ptrace.SpanKindClient, + attrs: map[string]string{dbSystemKey: mysqlSystem}, + sqlSanitized: "SELECT * FROM accounts WHERE id = ?", + }, + { + name: "redis_span", + spanName: "SET user:123 secret", + kind: ptrace.SpanKindClient, + attrs: map[string]string{dbSystemKey: redisSystem}, + redisSanitized: "SET user:123 ?", + }, + { + name: "memcached_span", + spanName: "set key 0 60 5\r\nmysecret\r\n", + kind: ptrace.SpanKindClient, + attrs: map[string]string{dbSystemKey: memcachedSystem}, + memcachedSanitized: "set key 0 60 5", + }, + { + name: "mongo_span", + spanName: `{"find":"users","filter":{"name":"john"}}`, + kind: ptrace.SpanKindClient, + attrs: map[string]string{dbSystemKey: mongoSystem}, + mongoSanitized: `{"find":"?","filter":{"name":"?"}}`, + }, + { + name: "opensearch_span", + spanName: `{"query":{"match":{"title":"secret"}}}`, + kind: ptrace.SpanKindClient, + attrs: map[string]string{dbSystemKey: openSearchSystem}, + openSearchSanitized: `{"query":{"match":{"title":"?"}}}`, + }, + { + name: "elasticsearch_span", + spanName: `{"query":{"match":{"title":"secret"}}}`, + kind: ptrace.SpanKindClient, + attrs: map[string]string{dbSystemKey: elasticsearchSystem}, + elasticsearchSanitized: `{"query":{"match":{"title":"?"}}}`, + }, + { + name: "query_string_span", + spanName: "GET /reports/7?sql=SELECT+1", + kind: ptrace.SpanKindClient, + urlSanitized: "GET /reports/*", + }, + { + name: "valkey_span", + spanName: "GET key:123", + kind: ptrace.SpanKindClient, + attrs: map[string]string{dbSystemKey: "valkey"}, + valkeySanitized: "GET key:123", + }, + } + + for urlBit := 0; urlBit < 2; urlBit++ { + for mask := 0; mask < (1 << 7); mask++ { + state := sanitizerState{ + url: urlBit == 1, + sql: mask&1 != 0, + redis: mask&(1<<1) != 0, + valkey: mask&(1<<2) != 0, + memcached: mask&(1<<3) != 0, + mongo: mask&(1<<4) != 0, + openSearch: mask&(1<<5) != 0, + elastic: mask&(1<<6) != 0, + } + + config := &Config{ + AllowAllKeys: true, + URLSanitization: url.URLSanitizationConfig{ + Enabled: state.url, + }, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{Enabled: state.sql}, + RedisConfig: db.RedisConfig{Enabled: state.redis}, + ValkeyConfig: db.ValkeyConfig{Enabled: state.valkey}, + MemcachedConfig: db.MemcachedConfig{Enabled: state.memcached}, + MongoConfig: db.MongoConfig{Enabled: state.mongo}, + OpenSearchConfig: db.OpenSearchConfig{Enabled: state.openSearch}, + ESConfig: db.ESConfig{Enabled: state.elastic}, + }, + } + + for _, sc := range spanCases { + t.Run(sc.name, func(t *testing.T) { + result := runSpanNameTest(t, config, sc.spanName, sc.kind, maps.Clone(sc.attrs)) + assert.NotEmpty(t, result, "span name must not be empty") + assert.NotEqual(t, "...", result, "span name must not be ellipsis") + assert.Equalf( + t, + expectedSpanResult(sc, state), + result, + "span=%s url=%t mask=%06b", + sc.name, + state.url, + mask, + ) + }) + } + } + } +} + +func expectedSpanResult(sc spanTestCase, state sanitizerState) string { + result := sc.spanName + if state.url && sc.urlSanitized != "" { + result = sc.urlSanitized + } + + if sc.attrs == nil { + return result + } + + switch sc.attrs[dbSystemKey] { + case "mysql": + if state.sql && sc.sqlSanitized != "" { + return sc.sqlSanitized + } + case "redis": + if state.redis && sc.redisSanitized != "" { + return sc.redisSanitized + } + case "valkey": + if state.valkey && sc.valkeySanitized != "" { + return sc.valkeySanitized + } + case "memcached": + if state.memcached && sc.memcachedSanitized != "" { + return sc.memcachedSanitized + } + case "mongodb": + if state.mongo && sc.mongoSanitized != "" { + return sc.mongoSanitized + } + case "opensearch": + if state.openSearch && sc.openSearchSanitized != "" { + return sc.openSearchSanitized + } + case "elasticsearch": + if state.elastic && sc.elasticsearchSanitized != "" { + return sc.elasticsearchSanitized + } + } + return result +} + +func runSpanNameTest(t *testing.T, config *Config, spanName string, kind ptrace.SpanKind, attrs map[string]string) string { + inBatch := ptrace.NewTraces() + rs := inBatch.ResourceSpans().AppendEmpty() + ils := rs.ScopeSpans().AppendEmpty() + span := ils.Spans().AppendEmpty() + + span.SetName(spanName) + span.SetKind(kind) + for k, v := range attrs { + span.Attributes().PutStr(k, v) + } + + processor, err := newRedaction(t.Context(), config, zaptest.NewLogger(t)) + require.NoError(t, err) + + outTraces, err := processor.processTraces(t.Context(), inBatch) + require.NoError(t, err) + + return outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0).Name() +} + +func TestAllDBSystemsSpanName(t *testing.T) { + testCases := []struct { + name string + dbSystem string + dbSystemKey string // defaults to "db.system" if empty + spanName string + configFunc func() *Config + expected string + }{ + { + name: "mysql_with_db.system", + dbSystem: mysqlSystem, + spanName: "SELECT * FROM users WHERE id = 123", + configFunc: func() *Config { + return &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{Enabled: true}, + }, + } + }, + expected: "SELECT * FROM users WHERE id = ?", + }, + { + name: "mysql_with_db.system.name", + dbSystem: mysqlSystem, + dbSystemKey: "db.system.name", + spanName: "SELECT * FROM payments WHERE account_id = 123", + configFunc: func() *Config { + return &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{Enabled: true}, + }, + } + }, + expected: "SELECT * FROM payments WHERE account_id = ?", + }, + { + name: "redis", + dbSystem: redisSystem, + spanName: "SET user:12345 my-secret", + configFunc: func() *Config { + return &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + RedisConfig: db.RedisConfig{Enabled: true}, + }, + } + }, + expected: "SET user:12345 ?", + }, + { + name: "postgresql", + dbSystem: semconv128.DBSystemPostgreSQL.Value.AsString(), + spanName: "SELECT * FROM users WHERE id = 42", + configFunc: func() *Config { + return &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{Enabled: true}, + }, + } + }, + expected: "SELECT * FROM users WHERE id = ?", + }, + { + name: "mariadb", + dbSystem: semconv128.DBSystemMariaDB.Value.AsString(), + spanName: "UPDATE accounts SET balance = 100 WHERE id = 7", + configFunc: func() *Config { + return &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{Enabled: true}, + }, + } + }, + expected: "UPDATE accounts SET balance = ? WHERE id = ?", + }, + { + name: "sqlite", + dbSystem: semconv128.DBSystemSqlite.Value.AsString(), + spanName: "DELETE FROM sessions WHERE expired < 1234567890", + configFunc: func() *Config { + return &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{Enabled: true}, + }, + } + }, + expected: "DELETE FROM sessions WHERE expired < ?", + }, + { + name: "mongodb", + dbSystem: mongoSystem, + spanName: `{"find":"users","filter":{"name":"john","age":30}}`, + configFunc: func() *Config { + return &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + MongoConfig: db.MongoConfig{Enabled: true}, + }, + } + }, + expected: `{"find":"?","filter":{"name":"?","age":"?"}}`, + }, + { + name: "elasticsearch", + dbSystem: elasticsearchSystem, + spanName: `{"query":{"match":{"title":"secret-document"}}}`, + configFunc: func() *Config { + return &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + ESConfig: db.ESConfig{Enabled: true}, + }, + } + }, + expected: `{"query":{"match":{"title":"?"}}}`, + }, + { + name: "opensearch", + dbSystem: openSearchSystem, + spanName: `{"query":{"term":{"user_id":"12345"}}}`, + configFunc: func() *Config { + return &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + OpenSearchConfig: db.OpenSearchConfig{Enabled: true}, + }, + } + }, + expected: `{"query":{"term":{"user_id":"?"}}}`, + }, + { + name: "memcached", + dbSystem: memcachedSystem, + spanName: "set mykey 0 3600 11\r\nsecret-data\r\n", + configFunc: func() *Config { + return &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + MemcachedConfig: db.MemcachedConfig{Enabled: true}, + }, + } + }, + expected: "set mykey 0 3600 11", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + key := dbSystemKey + if tc.dbSystemKey != "" { + key = tc.dbSystemKey + } + result := runSpanNameTest( + t, tc.configFunc(), tc.spanName, ptrace.SpanKindClient, + map[string]string{key: tc.dbSystem}, + ) + assert.Equal(t, tc.expected, result) + }) + } +} + +func TestValkeySpanNameObfuscation(t *testing.T) { + config := &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + ValkeyConfig: db.ValkeyConfig{ + Enabled: true, + }, + }, + } + + result := runSpanNameTest( + t, config, "SET user:123 secret-value", ptrace.SpanKindClient, + map[string]string{dbSystemKey: "valkey"}, + ) + assert.Equal(t, "SET user:123 ?", result) +} + +func TestSpanKindFiltering(t *testing.T) { + config := &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{Enabled: true}, + }, + } + + testCases := []struct { + name string + kind ptrace.SpanKind + shouldSanitize bool + }{ + {"client", ptrace.SpanKindClient, true}, + {"server", ptrace.SpanKindServer, true}, + {"internal", ptrace.SpanKindInternal, true}, + {"producer", ptrace.SpanKindProducer, false}, + {"consumer", ptrace.SpanKindConsumer, false}, + {"unspecified", ptrace.SpanKindUnspecified, false}, + } + + spanName := "SELECT * FROM users WHERE id = 42" + expected := "SELECT * FROM users WHERE id = ?" + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := runSpanNameTest( + t, config, spanName, tc.kind, + map[string]string{dbSystemKey: mysqlSystem}, + ) + if tc.shouldSanitize { + assert.Equal(t, expected, result) + } else { + assert.Equal(t, spanName, result) + } + }) + } +} + +func TestSpanNameEdgeCases(t *testing.T) { + config := &Config{ + AllowAllKeys: true, + URLSanitization: url.URLSanitizationConfig{ + Enabled: true, + }, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{Enabled: true}, + }, + } + + testCases := []struct { + name string + spanName string + kind ptrace.SpanKind + attrs map[string]string + expected string + }{ + { + name: "empty_span_name", + spanName: "", + kind: ptrace.SpanKindClient, + attrs: nil, + expected: "", + }, + { + name: "only_numbers", + spanName: "12345", + kind: ptrace.SpanKindClient, + attrs: nil, + expected: "12345", + }, + { + name: "only_slash", + spanName: "/", + kind: ptrace.SpanKindClient, + attrs: nil, + expected: "/", + }, + { + name: "unicode_in_url", + spanName: "/users/测试/profile", + kind: ptrace.SpanKindClient, + attrs: nil, + expected: "/users/*/profile", + }, + { + name: "special_chars_in_url", + spanName: "/api/v1/items/@special/data", + kind: ptrace.SpanKindClient, + attrs: nil, + expected: "/api/v1/items/*/data", + }, + { + name: "very_long_span_name", + spanName: "/api/" + strings.Repeat("segment/", 100) + "end", + kind: ptrace.SpanKindClient, + attrs: nil, + // URL sanitizer has a limit, so it only processes the first few segments + expected: "/api/segment/segment/segment/segment/segment/segment/segment/segment", + }, + { + name: "whitespace_only", + spanName: " ", + kind: ptrace.SpanKindClient, + attrs: nil, + expected: " ", + }, + { + name: "sql_with_newlines", + spanName: "SELECT *\nFROM users\nWHERE id = 42", + kind: ptrace.SpanKindClient, + attrs: map[string]string{dbSystemKey: mysqlSystem}, + expected: "SELECT *\nFROM users\nWHERE id = ?", + }, + { + name: "url_with_fragment", + spanName: "GET /page/123#section", + kind: ptrace.SpanKindClient, + attrs: nil, + expected: "GET /page/*", + }, + { + name: "mixed_slashes", + spanName: "/v1/api\\path/123", + kind: ptrace.SpanKindClient, + attrs: nil, + // Backslash is treated as a separator, so both /api and path are sanitized + expected: "/v1/*/*", + }, + { + name: "no_sanitization_without_db_system", + spanName: "SELECT * FROM users WHERE id = 42", + kind: ptrace.SpanKindClient, + attrs: nil, + expected: "SELECT * FROM users WHERE id = 42", + }, + { + name: "json_like_but_not_db", + spanName: `{"message":"hello"}`, + kind: ptrace.SpanKindClient, + attrs: nil, + expected: `{"message":"hello"}`, + }, + { + name: "span_name_without_slash_sanitized_to_asterisk", + spanName: "okey-dokey-0", + kind: ptrace.SpanKindServer, + attrs: map[string]string{"http.method": "GET"}, + expected: "okey-dokey-0", + }, + { + name: "span_name_without_slash_sanitized_to_asterisk_client", + spanName: "simple-operation-123", + kind: ptrace.SpanKindClient, + attrs: map[string]string{"http.request.method": "POST"}, + expected: "simple-operation-123", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := runSpanNameTest(t, config, tc.spanName, tc.kind, tc.attrs) + assert.Equal(t, tc.expected, result) + }) + } +} diff --git a/processor/redactionprocessor/processor_test.go b/processor/redactionprocessor/processor_test.go index 3537295bdbbbf..b6da5b5aa6371 100644 --- a/processor/redactionprocessor/processor_test.go +++ b/processor/redactionprocessor/processor_test.go @@ -15,6 +15,7 @@ import ( "go.opentelemetry.io/collector/pdata/plog" "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/pdata/ptrace" + semconv128 "go.opentelemetry.io/otel/semconv/v1.28.0" "go.uber.org/zap/zaptest" "github.com/open-telemetry/opentelemetry-collector-contrib/processor/redactionprocessor/internal/db" @@ -1583,459 +1584,229 @@ func TestURLSanitizationAttributeFiltering(t *testing.T) { assert.Equal(t, "/users/3", otherURL.Str()) } -func TestURLSanitizationSpanName(t *testing.T) { - t.Run("span name should be sanitized when allowed", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - URLSanitization: url.URLSanitizationConfig{ - Enabled: true, +func TestDBObfuscationUsesDBSystemForAttributes(t *testing.T) { + tc := testConfig{ + config: &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, }, - }, - } - - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - span.SetName("/users/123/profile") - span.SetKind(ptrace.SpanKindClient) - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - assert.Equal(t, "/users/*/profile", outSpan.Name()) - }) - - t.Run("span name should not be sanitized when is not client or server", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - URLSanitization: url.URLSanitizationConfig{ - Enabled: true, + RedisConfig: db.RedisConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, }, }, - } + }, + } - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - span.SetName("/users/123/profile") + inBatch := ptrace.NewTraces() + rs := inBatch.ResourceSpans().AppendEmpty() + ils := rs.ScopeSpans().AppendEmpty() + + sqlSpan := ils.Spans().AppendEmpty() + sqlSpan.SetName("SELECT") + sqlSpan.SetKind(ptrace.SpanKindClient) + sqlSpan.Attributes().PutStr(string(semconv128.DBSystemKey), semconv128.DBSystemMySQL.Value.AsString()) + sqlSpan.Attributes().PutStr("db.statement", "SELECT id, email FROM users WHERE id = 42 AND email = 'foo@example.com'") - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) + redisSpan := ils.Spans().AppendEmpty() + redisSpan.SetName("GET") + redisSpan.SetKind(ptrace.SpanKindClient) + redisSpan.Attributes().PutStr(string(semconv128.DBSystemKey), semconv128.DBSystemRedis.Value.AsString()) + redisSpan.Attributes().PutStr("db.statement", "SET user:12345 my-secret") - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - assert.Equal(t, "/users/123/profile", outSpan.Name()) - }) + processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) + require.NoError(t, err) + + outTraces, err := processor.processTraces(t.Context(), inBatch) + require.NoError(t, err) + + spans := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans() + + sqlStmt, ok := spans.At(0).Attributes().Get("db.statement") + require.True(t, ok) + assert.Equal(t, "SELECT id, email FROM users WHERE id = ? AND email = ?", sqlStmt.Str()) + + redisStmt, ok := spans.At(1).Attributes().Get("db.statement") + require.True(t, ok) + assert.Equal(t, "SET user:12345 ?", redisStmt.Str()) } -func TestURLSanitizationSpanNameWithBlockedValues(t *testing.T) { - t.Run("span name with blocked value should not be sanitized", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - BlockedValues: []string{"4[0-9]{12}(?:[0-9]{3})?"}, - URLSanitization: url.URLSanitizationConfig{ - Enabled: true, +func TestDBObfuscationUsesDBSystemNameForAttributes(t *testing.T) { + tc := testConfig{ + config: &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, }, }, - } + }, + } - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - span.SetName("/users/4111111111111111/profile") - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - assert.Equal(t, "/users/4111111111111111/profile", outSpan.Name()) - }) - - t.Run("span name with service identifiers should be sanitized when allowed", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - URLSanitization: url.URLSanitizationConfig{ - Enabled: true, - }, - }, - } + inBatch := ptrace.NewTraces() + rs := inBatch.ResourceSpans().AppendEmpty() + ils := rs.ScopeSpans().AppendEmpty() - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - span.SetName("payments-dev-sql-adapter-queue /api/process/123") - span.SetKind(ptrace.SpanKindServer) - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - assert.Equal(t, "payments-dev-sql-adapter-queue /api/process/*", outSpan.Name()) - }) - - t.Run("span name with secret pattern should not be sanitized", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - BlockedValues: []string{"secret-[0-9]+"}, - URLSanitization: url.URLSanitizationConfig{ - Enabled: true, - }, - }, - } + sqlSpan := ils.Spans().AppendEmpty() + sqlSpan.SetName("SELECT") + sqlSpan.SetKind(ptrace.SpanKindClient) + sqlSpan.Attributes().PutStr("db.system.name", "postgresql") + sqlSpan.Attributes().PutStr("db.statement", "SELECT email FROM users WHERE email = 'foo@bar.com'") - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - span.SetName("payments-service /api/secret-123/process") - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - assert.Equal(t, "payments-service /api/secret-123/process", outSpan.Name()) - }) - - t.Run("span name without slash should not be sanitized even when URL sanitization enabled", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - AllowedValues: []string{".*"}, - URLSanitization: url.URLSanitizationConfig{ - Enabled: true, + processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) + require.NoError(t, err) + + outTraces, err := processor.processTraces(t.Context(), inBatch) + require.NoError(t, err) + + stmt, ok := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0).Attributes().Get("db.statement") + require.True(t, ok) + assert.Equal(t, "SELECT email FROM users WHERE email = ?", stmt.Str()) +} + +func TestDBObfuscationAttributesWithoutDBSystemDoesNothing(t *testing.T) { + tc := testConfig{ + config: &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, + }, + RedisConfig: db.RedisConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, }, }, - } - - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() + }, + } - // Test CLIENT span - clientSpan := ils.Spans().AppendEmpty() - clientSpan.SetName("us-west2-inventory-dev-catalog-sql-adapter-queue process") - clientSpan.SetKind(ptrace.SpanKindClient) + inBatch := ptrace.NewTraces() + rs := inBatch.ResourceSpans().AppendEmpty() + ils := rs.ScopeSpans().AppendEmpty() - // Test SERVER span - serverSpan := ils.Spans().AppendEmpty() - serverSpan.SetName("eu-central1-shipping-prod-delivery-processor handle") - serverSpan.SetKind(ptrace.SpanKindServer) + sqlSpan := ils.Spans().AppendEmpty() + sqlSpan.SetName("SELECT") + sqlSpan.SetKind(ptrace.SpanKindClient) + sqlSpan.Attributes().PutStr("db.statement", "SELECT id FROM accounts WHERE id = 42") - // Test INTERNAL span (should not be processed) - internalSpan := ils.Spans().AppendEmpty() - internalSpan.SetName("cache-service-lookup get-item") - internalSpan.SetKind(ptrace.SpanKindInternal) + redisSpan := ils.Spans().AppendEmpty() + redisSpan.SetName("GET") + redisSpan.SetKind(ptrace.SpanKindClient) + redisSpan.Attributes().PutStr("db.statement", "SET user:999 secret") - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) + processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) + require.NoError(t, err) - spans := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans() + outTraces, err := processor.processTraces(t.Context(), inBatch) + require.NoError(t, err) - // CLIENT span should remain unchanged (no slash) - assert.Equal(t, "us-west2-inventory-dev-catalog-sql-adapter-queue process", spans.At(0).Name()) + spans := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans() - // SERVER span should remain unchanged (no slash) - assert.Equal(t, "eu-central1-shipping-prod-delivery-processor handle", spans.At(1).Name()) + sqlStmt, ok := spans.At(0).Attributes().Get("db.statement") + require.True(t, ok) + assert.Equal(t, "SELECT id FROM accounts WHERE id = 42", sqlStmt.Str()) - // INTERNAL span should remain unchanged (wrong kind) - assert.Equal(t, "cache-service-lookup get-item", spans.At(2).Name()) - }) + redisStmt, ok := spans.At(1).Attributes().Get("db.statement") + require.True(t, ok) + assert.Equal(t, "SET user:999 secret", redisStmt.Str()) +} - t.Run("span name with slash should be sanitized when conditions met", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - URLSanitization: url.URLSanitizationConfig{ - Enabled: true, - }, +func TestLogAttributesObfuscationWithoutDBSystem(t *testing.T) { + cfg := &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, }, - } + }, + } - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() + cfg.DBSanitizer.AllowFallbackWithoutSystem = true - // Test CLIENT span with slash - clientSpan := ils.Spans().AppendEmpty() - clientSpan.SetName("GET /api/v1/payments/123") - clientSpan.SetKind(ptrace.SpanKindClient) + processor, err := newRedaction(t.Context(), cfg, zaptest.NewLogger(t)) + require.NoError(t, err) - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) + logs := plog.NewLogs() + rl := logs.ResourceLogs().AppendEmpty() + ils := rl.ScopeLogs().AppendEmpty() + logRecord := ils.LogRecords().AppendEmpty() + logRecord.Attributes().PutStr("db.statement", "SELECT password FROM users WHERE id = 42") - spans := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans() + outLogs, err := processor.processLogs(t.Context(), logs) + require.NoError(t, err) - // CLIENT span should be sanitized (has slash and is CLIENT/SERVER kind) - assert.Equal(t, "GET /api/v1/payments/*", spans.At(0).Name()) - }) + stmt, ok := outLogs.ResourceLogs().At(0).ScopeLogs().At(0).LogRecords().At(0).Attributes().Get("db.statement") + require.True(t, ok) + assert.Equal(t, "SELECT password FROM users WHERE id = ?", stmt.Str()) } -func TestDBObfuscationSpanName(t *testing.T) { - t.Run("span name with SQL query should be obfuscated when SQL config enabled", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - DBSanitizer: db.DBSanitizerConfig{ - SQLConfig: db.SQLConfig{ - Enabled: true, - }, - }, +func TestMetricAttributesDBObfuscationWithSystem(t *testing.T) { + cfg := &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, }, - } + }, + } - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - span.SetName("SELECT * FROM users WHERE id = 123") - span.SetKind(ptrace.SpanKindClient) - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - // SQL query should be obfuscated (numbers replaced) - assert.Contains(t, outSpan.Name(), "SELECT * FROM users WHERE id = ?") - }) - - t.Run("span name with Redis command should be obfuscated when Redis config enabled", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - DBSanitizer: db.DBSanitizerConfig{ - RedisConfig: db.RedisConfig{ - Enabled: true, - }, - }, - }, - } + processor, err := newRedaction(t.Context(), cfg, zaptest.NewLogger(t)) + require.NoError(t, err) - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - span.SetName("SET user:12345 value") - span.SetKind(ptrace.SpanKindClient) - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - // Redis SET command should have value removed but key retained - assert.Equal(t, "SET user:12345 ?", outSpan.Name()) - }) - - t.Run("span name without slash should be obfuscated when DB obfuscator enabled", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - DBSanitizer: db.DBSanitizerConfig{ - SQLConfig: db.SQLConfig{ - Enabled: true, - }, - }, - }, - } + metrics := pmetric.NewMetrics() + rm := metrics.ResourceMetrics().AppendEmpty() + sm := rm.ScopeMetrics().AppendEmpty() + metric := sm.Metrics().AppendEmpty() + metric.SetName("request") + metric.SetEmptyGauge() + dp := metric.Gauge().DataPoints().AppendEmpty() + dp.Attributes().PutStr("db.system", "mysql") + dp.Attributes().PutStr("db.statement", "SELECT id FROM accounts WHERE id = 42") + + outMetrics, err := processor.processMetrics(t.Context(), metrics) + require.NoError(t, err) - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - // No slash in span name - span.SetName("SELECT count(*) FROM orders WHERE status = 'pending'") - span.SetKind(ptrace.SpanKindServer) - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - // Should be obfuscated even without slash - assert.Contains(t, outSpan.Name(), "SELECT count(*) FROM orders WHERE status = ?") - }) - - t.Run("span name should be processed by both URL sanitizer and DB obfuscator when both enabled", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - URLSanitization: url.URLSanitizationConfig{ - Enabled: true, - }, - DBSanitizer: db.DBSanitizerConfig{ - SQLConfig: db.SQLConfig{ - Enabled: true, - }, - }, - }, - } + outAttrs := outMetrics.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0).Gauge().DataPoints().At(0).Attributes() + stmt, ok := outAttrs.Get("db.statement") + require.True(t, ok) + assert.Equal(t, "SELECT id FROM accounts WHERE id = ?", stmt.Str()) +} - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - // Span name that could benefit from both sanitizers - span.SetName("/api/users/123") - span.SetKind(ptrace.SpanKindClient) - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - // URL should be sanitized by URL sanitizer - assert.Equal(t, "/api/users/*", outSpan.Name()) - }) - - t.Run("span name should not be obfuscated when span kind is INTERNAL", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - DBSanitizer: db.DBSanitizerConfig{ - SQLConfig: db.SQLConfig{ - Enabled: true, - }, - }, +func TestMetricAttributesDBObfuscationWithoutSystem(t *testing.T) { + cfg := &Config{ + AllowAllKeys: true, + DBSanitizer: db.DBSanitizerConfig{ + SQLConfig: db.SQLConfig{ + Enabled: true, + Attributes: []string{"db.statement"}, }, - } + }, + } - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - span.SetName("SELECT * FROM users WHERE id = 123") - span.SetKind(ptrace.SpanKindInternal) - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - // Should NOT be obfuscated because span kind is INTERNAL - assert.Equal(t, "SELECT * FROM users WHERE id = 123", outSpan.Name()) - }) - - t.Run("span name with no enabled DB configs should not be DB obfuscated", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - DBSanitizer: db.DBSanitizerConfig{ - SQLConfig: db.SQLConfig{ - Enabled: false, - }, - RedisConfig: db.RedisConfig{ - Enabled: false, - }, - }, - }, - } + processor, err := newRedaction(t.Context(), cfg, zaptest.NewLogger(t)) + require.NoError(t, err) - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - // SQL query without slash - span.SetName("SELECT * FROM users WHERE id = 123") - span.SetKind(ptrace.SpanKindClient) - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - // Should NOT be obfuscated because no DB configs are enabled and no slash for URL sanitization - assert.Equal(t, "SELECT * FROM users WHERE id = 123", outSpan.Name()) - }) - - t.Run("span name with Mongo query should be obfuscated when Mongo config enabled", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - DBSanitizer: db.DBSanitizerConfig{ - MongoConfig: db.MongoConfig{ - Enabled: true, - }, - }, - }, - } + metrics := pmetric.NewMetrics() + rm := metrics.ResourceMetrics().AppendEmpty() + sm := rm.ScopeMetrics().AppendEmpty() + metric := sm.Metrics().AppendEmpty() + metric.SetName("request") + metric.SetEmptyGauge() + dp := metric.Gauge().DataPoints().AppendEmpty() + dp.Attributes().PutStr("db.statement", "SELECT id FROM accounts WHERE id = 42") - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - span.SetName(`{"find":"users","filter":{"_id":"507f1f77bcf86cd799439011"}}`) - span.SetKind(ptrace.SpanKindClient) - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - // Mongo query should be obfuscated (all values replaced with ?) - assert.Contains(t, outSpan.Name(), `"find":"?"`) - assert.Contains(t, outSpan.Name(), `"filter":{"_id":"?"}`) - }) - - t.Run("span name with multiple database types enabled should be processed sequentially", func(t *testing.T) { - tc := testConfig{ - config: &Config{ - AllowAllKeys: true, - DBSanitizer: db.DBSanitizerConfig{ - SQLConfig: db.SQLConfig{ - Enabled: true, - }, - RedisConfig: db.RedisConfig{ - Enabled: true, - }, - }, - }, - } + outMetrics, err := processor.processMetrics(t.Context(), metrics) + require.NoError(t, err) - inBatch := ptrace.NewTraces() - rs := inBatch.ResourceSpans().AppendEmpty() - ils := rs.ScopeSpans().AppendEmpty() - span := ils.Spans().AppendEmpty() - span.SetName("SELECT * FROM cache WHERE key = 'user:123'") - span.SetKind(ptrace.SpanKindServer) - - processor, err := newRedaction(t.Context(), tc.config, zaptest.NewLogger(t)) - require.NoError(t, err) - outTraces, err := processor.processTraces(t.Context(), inBatch) - require.NoError(t, err) - - outSpan := outTraces.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) - // Should be processed by all enabled obfuscators - assert.NotEqual(t, "SELECT * FROM cache WHERE key = 'user:123'", outSpan.Name()) - }) + outAttrs := outMetrics.ResourceMetrics().At(0).ScopeMetrics().At(0).Metrics().At(0).Gauge().DataPoints().At(0).Attributes() + stmt, ok := outAttrs.Get("db.statement") + require.True(t, ok) + assert.Equal(t, "SELECT id FROM accounts WHERE id = 42", stmt.Str()) }