Skip to content

Commit 1679df6

Browse files
committed
Add system-aware database sanitization to avoid issues with span names
Signed-off-by: Israel Blancas <[email protected]>
1 parent 7caf035 commit 1679df6

File tree

17 files changed

+1877
-516
lines changed

17 files changed

+1877
-516
lines changed

.chloggen/44229.yaml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: 'bug_fix'
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
7+
component: processor/redaction
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Improve database sanitization with system-aware obfuscation, span name sanitization, and URL path parameter redaction.
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [44229]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext: |
19+
- Database sanitization now validates span kind (CLIENT/SERVER/INTERNAL ) and requires db.system.name/db.system attribute for traces/metrics
20+
- Implemented span name obfuscation for database operations based on db.system
21+
- Added URL path parameter sanitization for span names with configurable pattern matching
22+
- Improved query validation database sanitizers
23+
- Fix issue ensuring no spans with `...` name can be generated due to enabling multiple sanitizers
24+
- If something went wrong during span name sanitization, original span name is used
25+
26+
# If your change doesn't affect end users or the exported elements of any package,
27+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
28+
# Optional: The change log or logs in which this entry should be included.
29+
# e.g. '[user]' or '[user, api]'
30+
# Include 'user' if the change is relevant to end users.
31+
# Include 'api' if there is a change to a library API.
32+
# Default: '[user]'
33+
change_logs: [user]

processor/redactionprocessor/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,3 +193,5 @@ The database sanitizer will:
193193
- Preserve query structure while removing sensitive data
194194

195195
This provides an additional layer of protection when collecting telemetry that includes database operations.
196+
197+
**Trace and metric behaviour:** Database sanitization for spans and metric attributes only runs when the telemetry includes a `db.system.name` or `db.system` attribute and the span kind is `CLIENT` or `SERVER`. This prevents non-database spans from being rewritten. Logs automatically enable a sequential fallback internally, so database attributes without `db.system` can still be sanitized when they appear in log records.

processor/redactionprocessor/factory.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,14 @@ func createLogsProcessor(
6464
next consumer.Logs,
6565
) (processor.Logs, error) {
6666
oCfg := cfg.(*Config)
67-
68-
red, err := newRedaction(ctx, oCfg, set.Logger)
67+
logCfg := *oCfg
68+
// Attributes are defined for metrics and traces:
69+
// https://opentelemetry.io/docs/specs/semconv/database/
70+
// For logs, we don't rely on the "db.system.name" attribute to
71+
// do the sanitization.
72+
logCfg.DBSanitizer.AllowFallbackWithoutSystem = true
73+
74+
red, err := newRedaction(ctx, &logCfg, set.Logger)
6975
if err != nil {
7076
return nil, fmt.Errorf("error creating a redaction processor: %w", err)
7177
}

processor/redactionprocessor/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ require (
1616
go.opentelemetry.io/collector/processor v1.46.1-0.20251120204106-2e9c82787618
1717
go.opentelemetry.io/collector/processor/processorhelper v0.140.1-0.20251120204106-2e9c82787618
1818
go.opentelemetry.io/collector/processor/processortest v0.140.1-0.20251120204106-2e9c82787618
19+
go.opentelemetry.io/otel v1.38.0
1920
go.uber.org/goleak v1.3.0
2021
go.uber.org/zap v1.27.0
2122
golang.org/x/crypto v0.45.0
@@ -54,7 +55,6 @@ require (
5455
go.opentelemetry.io/collector/pdata/testdata v0.140.1-0.20251120204106-2e9c82787618 // indirect
5556
go.opentelemetry.io/collector/pipeline v1.46.1-0.20251120204106-2e9c82787618 // indirect
5657
go.opentelemetry.io/collector/processor/xprocessor v0.140.1-0.20251120204106-2e9c82787618 // indirect
57-
go.opentelemetry.io/otel v1.38.0 // indirect
5858
go.opentelemetry.io/otel/metric v1.38.0 // indirect
5959
go.opentelemetry.io/otel/sdk v1.38.0 // indirect
6060
go.opentelemetry.io/otel/sdk/metric v1.38.0 // indirect

processor/redactionprocessor/internal/db/config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ type DBSanitizerConfig struct {
1111
MongoConfig MongoConfig `mapstructure:"mongo"`
1212
OpenSearchConfig OpenSearchConfig `mapstructure:"opensearch"`
1313
ESConfig ESConfig `mapstructure:"es"`
14+
// AllowFallbackWithoutSystem enables sequential sanitization when `db.system` is missing.
15+
// This is meant for logs contexts and is set internally, not via YAML.
16+
AllowFallbackWithoutSystem bool `mapstructure:"-"`
1417
}
1518

1619
type SQLConfig struct {

processor/redactionprocessor/internal/db/db.go

Lines changed: 129 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,19 @@
44
package db // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/redactionprocessor/internal/db"
55

66
import (
7+
"strings"
8+
79
"github.com/DataDog/datadog-agent/pkg/obfuscate"
10+
semconv128 "go.opentelemetry.io/otel/semconv/v1.28.0"
11+
"go.uber.org/zap"
812
)
913

1014
type Obfuscator struct {
11-
obfuscators []databaseObfuscator
12-
processAttributesEnabled bool
15+
obfuscators []databaseObfuscator
16+
processAttributesEnabled bool
17+
logger *zap.Logger
18+
allowFallbackWithoutSystem bool
19+
DBSystem string
1320
}
1421

1522
func createAttributes(attributes []string) map[string]bool {
@@ -20,7 +27,10 @@ func createAttributes(attributes []string) map[string]bool {
2027
return attributesMap
2128
}
2229

23-
func NewObfuscator(cfg DBSanitizerConfig) *Obfuscator {
30+
func NewObfuscator(cfg DBSanitizerConfig, logger *zap.Logger) *Obfuscator {
31+
if logger == nil {
32+
logger = zap.NewNop()
33+
}
2434
o := obfuscate.NewObfuscator(obfuscate.Config{
2535
SQL: obfuscate.SQLConfig{
2636
ReplaceDigits: true,
@@ -49,83 +59,92 @@ func NewObfuscator(cfg DBSanitizerConfig) *Obfuscator {
4959
processAttributesEnabled := false
5060

5161
if cfg.SQLConfig.Enabled {
52-
attributes := createAttributes(cfg.SQLConfig.Attributes)
53-
processAttributesEnabled = processAttributesEnabled || len(attributes) > 0
62+
dbAttrs := newDBAttributes(cfg.SQLConfig.Attributes, []string{
63+
semconv128.DBSystemOtherSQL.Value.AsString(),
64+
semconv128.DBSystemMySQL.Value.AsString(),
65+
semconv128.DBSystemPostgreSQL.Value.AsString(),
66+
semconv128.DBSystemMariaDB.Value.AsString(),
67+
semconv128.DBSystemSqlite.Value.AsString(),
68+
})
69+
processAttributesEnabled = processAttributesEnabled || len(dbAttrs.attributes) > 0
5470
obfuscators = append(obfuscators, &sqlObfuscator{
55-
dbAttributes: dbAttributes{
56-
attributes: attributes,
57-
},
58-
obfuscator: o,
71+
dbAttributes: dbAttrs,
72+
obfuscator: o,
5973
})
6074
}
6175

6276
if cfg.RedisConfig.Enabled {
63-
attributes := createAttributes(cfg.RedisConfig.Attributes)
64-
processAttributesEnabled = processAttributesEnabled || len(attributes) > 0
77+
dbAttrs := newDBAttributes(cfg.RedisConfig.Attributes, []string{
78+
semconv128.DBSystemRedis.Value.AsString(),
79+
})
80+
processAttributesEnabled = processAttributesEnabled || len(dbAttrs.attributes) > 0
6581
obfuscators = append(obfuscators, &redisObfuscator{
66-
dbAttributes: dbAttributes{
67-
attributes: attributes,
68-
},
69-
obfuscator: o,
82+
dbAttributes: dbAttrs,
83+
obfuscator: o,
7084
})
7185
}
7286

7387
if cfg.ValkeyConfig.Enabled {
74-
attributes := createAttributes(cfg.ValkeyConfig.Attributes)
75-
processAttributesEnabled = processAttributesEnabled || len(attributes) > 0
76-
obfuscators = append(obfuscators, &valkeyObfuscator{
77-
dbAttributes: dbAttributes{
78-
attributes: attributes,
79-
},
80-
obfuscator: o,
88+
dbAttrs := newDBAttributes(cfg.ValkeyConfig.Attributes, []string{
89+
"valkey", // Not part of semantic conventions
90+
})
91+
processAttributesEnabled = processAttributesEnabled || len(dbAttrs.attributes) > 0
92+
obfuscators = append(obfuscators, &redisObfuscator{
93+
dbAttributes: dbAttrs,
94+
obfuscator: o,
8195
})
8296
}
8397

8498
if cfg.MemcachedConfig.Enabled {
85-
attributes := createAttributes(cfg.MemcachedConfig.Attributes)
86-
processAttributesEnabled = processAttributesEnabled || len(attributes) > 0
99+
dbAttrs := newDBAttributes(cfg.MemcachedConfig.Attributes, []string{
100+
semconv128.DBSystemMemcached.Value.AsString(),
101+
})
102+
processAttributesEnabled = processAttributesEnabled || len(dbAttrs.attributes) > 0
87103
obfuscators = append(obfuscators, &memcachedObfuscator{
88-
dbAttributes: dbAttributes{
89-
attributes: attributes,
90-
},
91-
obfuscator: o,
104+
dbAttributes: dbAttrs,
105+
obfuscator: o,
92106
})
93107
}
94108

95109
if cfg.MongoConfig.Enabled {
96-
attributes := createAttributes(cfg.MongoConfig.Attributes)
97-
processAttributesEnabled = processAttributesEnabled || len(attributes) > 0
110+
dbAttrs := newDBAttributes(cfg.MongoConfig.Attributes, []string{
111+
semconv128.DBSystemMongoDB.Value.AsString(),
112+
})
113+
processAttributesEnabled = processAttributesEnabled || len(dbAttrs.attributes) > 0
98114
obfuscators = append(obfuscators, &mongoObfuscator{
99-
dbAttributes: dbAttributes{
100-
attributes: attributes,
101-
},
102-
obfuscator: o,
115+
dbAttributes: dbAttrs,
116+
obfuscator: o,
117+
logger: logger,
103118
})
104119
}
105120

106121
if cfg.OpenSearchConfig.Enabled {
107-
attributes := createAttributes([]string{})
122+
dbAttrs := newDBAttributes([]string{}, []string{
123+
"opensearch", // Not part of semantic conventions
124+
})
108125
obfuscators = append(obfuscators, &opensearchObfuscator{
109-
dbAttributes: dbAttributes{
110-
attributes: attributes,
111-
},
112-
obfuscator: o,
126+
dbAttributes: dbAttrs,
127+
obfuscator: o,
128+
logger: logger,
113129
})
114130
}
115131

116132
if cfg.ESConfig.Enabled {
117-
attributes := createAttributes([]string{})
133+
dbAttrs := newDBAttributes([]string{}, []string{
134+
semconv128.DBSystemElasticsearch.Value.AsString(),
135+
})
118136
obfuscators = append(obfuscators, &esObfuscator{
119-
dbAttributes: dbAttributes{
120-
attributes: attributes,
121-
},
122-
obfuscator: o,
137+
dbAttributes: dbAttrs,
138+
obfuscator: o,
139+
logger: logger,
123140
})
124141
}
125142

126143
return &Obfuscator{
127-
obfuscators: obfuscators,
128-
processAttributesEnabled: processAttributesEnabled,
144+
obfuscators: obfuscators,
145+
processAttributesEnabled: processAttributesEnabled,
146+
logger: logger,
147+
allowFallbackWithoutSystem: cfg.AllowFallbackWithoutSystem,
129148
}
130149
}
131150

@@ -144,14 +163,40 @@ func (o *Obfuscator) ObfuscateAttribute(attributeValue, attributeKey string) (st
144163
if !o.HasSpecificAttributes() {
145164
return attributeValue, nil
146165
}
166+
167+
if o.DBSystem == "" {
168+
if o.allowFallbackWithoutSystem {
169+
return o.obfuscateSequentially(attributeValue, attributeKey)
170+
}
171+
return attributeValue, nil
172+
}
173+
147174
for _, obfuscator := range o.obfuscators {
148-
obfuscatedValue, err := obfuscator.ObfuscateAttribute(attributeValue, attributeKey)
175+
if !obfuscator.SupportsSystem(o.DBSystem) {
176+
continue
177+
}
178+
if !obfuscator.ShouldProcessAttribute(attributeKey) {
179+
continue
180+
}
181+
return obfuscator.ObfuscateAttribute(attributeValue, attributeKey)
182+
}
183+
184+
return attributeValue, nil
185+
}
186+
187+
func (o *Obfuscator) obfuscateSequentially(attributeValue, attributeKey string) (string, error) {
188+
result := attributeValue
189+
for _, obfuscator := range o.obfuscators {
190+
if !obfuscator.ShouldProcessAttribute(attributeKey) {
191+
continue
192+
}
193+
obfuscatedValue, err := obfuscator.ObfuscateAttribute(result, attributeKey)
149194
if err != nil {
150195
return attributeValue, err
151196
}
152-
attributeValue = obfuscatedValue
197+
result = obfuscatedValue
153198
}
154-
return attributeValue, nil
199+
return result, nil
155200
}
156201

157202
func (o *Obfuscator) HasSpecificAttributes() bool {
@@ -161,3 +206,38 @@ func (o *Obfuscator) HasSpecificAttributes() bool {
161206
func (o *Obfuscator) HasObfuscators() bool {
162207
return len(o.obfuscators) > 0
163208
}
209+
210+
func (o *Obfuscator) ObfuscateWithSystem(val, dbSystem string) (string, error) {
211+
if !o.HasObfuscators() {
212+
return val, nil
213+
}
214+
if dbSystem == "" {
215+
return val, nil
216+
}
217+
lower := strings.ToLower(dbSystem)
218+
for _, obfuscator := range o.obfuscators {
219+
if !obfuscator.SupportsSystem(lower) {
220+
continue
221+
}
222+
return obfuscator.ObfuscateWithSystem(val, lower)
223+
}
224+
return val, nil
225+
}
226+
227+
func createSystems(systems []string) map[string]bool {
228+
if len(systems) == 0 {
229+
return nil
230+
}
231+
systemsMap := make(map[string]bool, len(systems))
232+
for _, system := range systems {
233+
systemsMap[strings.ToLower(system)] = true
234+
}
235+
return systemsMap
236+
}
237+
238+
func newDBAttributes(attributes, systems []string) dbAttributes {
239+
return dbAttributes{
240+
attributes: createAttributes(attributes),
241+
dbSystems: createSystems(systems),
242+
}
243+
}

0 commit comments

Comments
 (0)