diff --git a/docs/config-schema.json b/docs/config-schema.json index c74745d51..b54c4610f 100644 --- a/docs/config-schema.json +++ b/docs/config-schema.json @@ -677,6 +677,10 @@ "$ref": "#/$defs/ElasticsearchConfig", "description": "Elasticsearch payload extraction and parsing" }, + "generic": { + "$ref": "#/$defs/HTTPGenericParsingConfig", + "description": "Generic HTTP header and payload extraction with policy-based rules" + }, "graphql": { "$ref": "#/$defs/GraphQLConfig", "description": "GraphQL payload extraction and parsing" @@ -692,6 +696,102 @@ }, "type": "object" }, + "HTTPGenericParsingConfig": { + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable generic HTTP header and payload extraction", + "x-env-var": "OTEL_EBPF_HTTP_GENERIC_PARSING_ENABLED" + }, + "policy": { + "$ref": "#/$defs/HTTPParsingPolicy", + "description": "Policy controls the default behavior and matching strategy" + }, + "rules": { + "items": { + "$ref": "#/$defs/HTTPParsingRule" + }, + "type": "array", + "description": "Rules is an ordered list of include/exclude/obfuscate rules. Rules are evaluated according to Policy.MatchOrder." + } + }, + "type": "object", + "description": "HTTPGenericParsingConfig configures generic HTTP header and payload extraction." + }, + "HTTPParsingMatch": { + "properties": { + "case_sensitive": { + "type": "boolean", + "description": "CaseSensitive controls whether matching is case-sensitive." + } + }, + "type": "object", + "description": "HTTPParsingMatch defines matching criteria for an HTTP parsing rule. Regex patterns are compiled during YAML unmarshaling. When CaseSensitive is false (the default), patterns are automatically wrapped with (?i)." + }, + "HTTPParsingPolicy": { + "properties": { + "default_action": { + "type": "string", + "enum": [ + "exclude", + "include", + "obfuscate" + ], + "description": "DefaultAction specifies what to do when no rule matches: \"include\" or \"exclude\"", + "x-env-var": "OTEL_EBPF_HTTP_PARSING_DEFAULT_ACTION" + }, + "match_order": { + "type": "string", + "enum": [ + "first_match_wins" + ], + "description": "MatchOrder controls how rules are evaluated: \"first_match_wins\"", + "x-env-var": "OTEL_EBPF_HTTP_PARSING_MATCH_ORDER" + }, + "obfuscation_string": { + "type": "string", + "description": "ObfuscationString is the replacement string used when a rule's action is \"obfuscate\"", + "x-env-var": "OTEL_EBPF_HTTP_PARSING_OBFUSCATION_STRING" + } + }, + "type": "object", + "description": "HTTPParsingPolicy defines the default action and match strategy for generic parsing rules." + }, + "HTTPParsingRule": { + "properties": { + "action": { + "type": "string", + "enum": [ + "exclude", + "include", + "obfuscate" + ], + "description": "Action of the rule: \"include\", \"exclude\", or \"obfuscate\"" + }, + "match": { + "$ref": "#/$defs/HTTPParsingMatch", + "description": "Match defines the matching criteria for this rule" + }, + "scope": { + "type": "string", + "enum": [ + "both", + "request", + "response" + ], + "description": "Scope of the rule: \"request\", \"response\", or \"both\"" + }, + "type": { + "type": "string", + "enum": [ + "headers" + ], + "description": "Type specifies what this rule matches against: \"headers\"" + } + }, + "type": "object", + "description": "HTTPParsingRule defines a single include/exclude/obfuscate rule for HTTP header and payload extraction." + }, "HostIDConfig": { "properties": { "override": { diff --git a/pkg/appolly/app/request/span.go b/pkg/appolly/app/request/span.go index 07e09c886..55b2a470b 100644 --- a/pkg/appolly/app/request/span.go +++ b/pkg/appolly/app/request/span.go @@ -359,6 +359,12 @@ type Span struct { AWS *AWS `json:"-"` OpenAI *OpenAI `json:"-"` + // RequestHeaders stores extracted HTTP request headers based on generic parsing rules. + // Keys are canonical header names, values are the (possibly obfuscated) header values. + RequestHeaders map[string]string `json:"-"` + // ResponseHeaders stores extracted HTTP response headers based on generic parsing rules. + ResponseHeaders map[string]string `json:"-"` + // OverrideTraceName is set under some conditions, like spanmetrics reaching the maximum // cardinality for trace names. OverrideTraceName string `json:"-"` diff --git a/pkg/config/payload_extraction.go b/pkg/config/payload_extraction.go index c24bef463..df37c3a91 100644 --- a/pkg/config/payload_extraction.go +++ b/pkg/config/payload_extraction.go @@ -3,12 +3,18 @@ package config // import "go.opentelemetry.io/obi/pkg/config" +import ( + "fmt" + "regexp" + "strings" +) + type PayloadExtraction struct { HTTP HTTPConfig `yaml:"http"` } func (p PayloadExtraction) Enabled() bool { - return p.HTTP.GraphQL.Enabled || p.HTTP.Elasticsearch.Enabled || p.HTTP.AWS.Enabled || p.HTTP.SQLPP.Enabled || p.HTTP.OpenAI.Enabled + return p.HTTP.GraphQL.Enabled || p.HTTP.Elasticsearch.Enabled || p.HTTP.AWS.Enabled || p.HTTP.SQLPP.Enabled || p.HTTP.OpenAI.Enabled || p.HTTP.GenericParsing.Enabled } type HTTPConfig struct { @@ -22,6 +28,8 @@ type HTTPConfig struct { SQLPP SQLPPConfig `yaml:"sqlpp"` // OpenAI payload extraction OpenAI OpenAIConfig `yaml:"openai"` + // Generic HTTP header and payload extraction with policy-based rules + GenericParsing HTTPGenericParsingConfig `yaml:"generic"` } type GraphQLConfig struct { @@ -51,3 +59,148 @@ type OpenAIConfig struct { // Enable OpenAI payload extraction and parsing Enabled bool `yaml:"enabled" env:"OTEL_EBPF_HTTP_OPENAI_ENABLED" validate:"boolean"` } + +// HTTPGenericParsingConfig configures generic HTTP header and payload extraction. +type HTTPGenericParsingConfig struct { + // Enable generic HTTP header and payload extraction + Enabled bool `yaml:"enabled" env:"OTEL_EBPF_HTTP_GENERIC_PARSING_ENABLED" validate:"boolean"` + // Policy controls the default behavior and matching strategy + Policy HTTPParsingPolicy `yaml:"policy"` + // Rules is an ordered list of include/exclude/obfuscate rules. + // Rules are evaluated according to Policy.MatchOrder. + Rules []HTTPParsingRule `yaml:"rules"` +} + +// HTTPParsingPolicy defines the default action and match strategy for generic parsing rules. +type HTTPParsingPolicy struct { + // DefaultAction specifies what to do when no rule matches: "include" or "exclude" + DefaultAction HTTPParsingAction `yaml:"default_action" env:"OTEL_EBPF_HTTP_PARSING_DEFAULT_ACTION"` + // MatchOrder controls how rules are evaluated: "first_match_wins" + MatchOrder HTTPParsingMatchOrder `yaml:"match_order" env:"OTEL_EBPF_HTTP_PARSING_MATCH_ORDER"` + // ObfuscationString is the replacement string used when a rule's action is "obfuscate" + ObfuscationString string `yaml:"obfuscation_string" env:"OTEL_EBPF_HTTP_PARSING_OBFUSCATION_STRING"` +} + +// HTTPParsingRule defines a single include/exclude/obfuscate rule for HTTP header and payload extraction. +type HTTPParsingRule struct { + // Action of the rule: "include", "exclude", or "obfuscate" + Action HTTPParsingAction `yaml:"action"` + // Type specifies what this rule matches against: "headers" + Type HTTPParsingRuleType `yaml:"type"` + // Scope of the rule: "request", "response", or "both" + Scope HTTPParsingScope `yaml:"scope"` + // Match defines the matching criteria for this rule + Match HTTPParsingMatch `yaml:"match"` +} + +// HTTPParsingRuleType specifies the target of a parsing rule. +type HTTPParsingRuleType string + +const ( + HTTPParsingRuleTypeHeaders HTTPParsingRuleType = "headers" +) + +func (t *HTTPParsingRuleType) UnmarshalText(text []byte) error { + str := HTTPParsingRuleType(strings.TrimSpace(strings.ToLower(string(text)))) + switch str { + case HTTPParsingRuleTypeHeaders: + *t = str + return nil + default: + return fmt.Errorf("invalid parsing rule type: %q (valid: headers)", string(text)) + } +} + +// HTTPParsingMatch defines matching criteria for an HTTP parsing rule. +// Regex patterns are compiled during YAML unmarshaling. When CaseSensitive +// is false (the default), patterns are automatically wrapped with (?i). +type HTTPParsingMatch struct { + // Regex is a list of compiled regular expressions to match against. + Regex []*regexp.Regexp `yaml:"-"` + // CaseSensitive controls whether matching is case-sensitive. + CaseSensitive bool `yaml:"case_sensitive"` +} + +// UnmarshalYAML deserializes the match config and compiles regex patterns. +func (m *HTTPParsingMatch) UnmarshalYAML(unmarshal func(interface{}) error) error { + // Use a raw struct to capture the string patterns before compiling. + var raw struct { + Regex []string `yaml:"regex"` + CaseSensitive bool `yaml:"case_sensitive"` + } + if err := unmarshal(&raw); err != nil { + return err + } + + m.CaseSensitive = raw.CaseSensitive + m.Regex = make([]*regexp.Regexp, 0, len(raw.Regex)) + for _, pattern := range raw.Regex { + if !m.CaseSensitive { + pattern = "(?i)" + pattern + } + re, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf("invalid regex %q in parsing match: %w", pattern, err) + } + m.Regex = append(m.Regex, re) + } + return nil +} + +// HTTPParsingAction represents the action for a generic parsing rule or default policy. +type HTTPParsingAction string + +const ( + HTTPParsingActionInclude HTTPParsingAction = "include" + HTTPParsingActionExclude HTTPParsingAction = "exclude" + HTTPParsingActionObfuscate HTTPParsingAction = "obfuscate" +) + +func (a *HTTPParsingAction) UnmarshalText(text []byte) error { + str := HTTPParsingAction(strings.TrimSpace(strings.ToLower(string(text)))) + switch str { + case HTTPParsingActionInclude, HTTPParsingActionExclude, HTTPParsingActionObfuscate: + *a = str + return nil + default: + return fmt.Errorf("invalid parsing action: %q (valid: include, exclude, obfuscate)", string(text)) + } +} + +// HTTPParsingAction represents the action for a http parsing rule or default policy. +type HTTPParsingScope string + +const ( + HTTPParsingScopeRequest HTTPParsingScope = "request" + HTTPParsingScopeResponse HTTPParsingScope = "response" + HTTPParsingScopeBoth HTTPParsingScope = "both" +) + +func (a *HTTPParsingScope) UnmarshalText(text []byte) error { + str := HTTPParsingScope(strings.TrimSpace(strings.ToLower(string(text)))) + switch str { + case HTTPParsingScopeRequest, HTTPParsingScopeResponse, HTTPParsingScopeBoth: + *a = str + return nil + default: + return fmt.Errorf("invalid parsing scope: %q (valid: include, exclude, obfuscate)", string(text)) + } +} + +// HTTPParsingMatchOrder controls how rules are evaluated. +type HTTPParsingMatchOrder string + +const ( + HTTPParsingMatchOrderFirstMatchWins HTTPParsingMatchOrder = "first_match_wins" +) + +func (m *HTTPParsingMatchOrder) UnmarshalText(text []byte) error { + str := HTTPParsingMatchOrder(strings.TrimSpace(strings.ToLower(string(text)))) + switch str { + case HTTPParsingMatchOrderFirstMatchWins: + *m = str + return nil + default: + return fmt.Errorf("invalid parsing match order: %q (valid: first_match_wins)", string(text)) + } +} diff --git a/pkg/ebpf/common/http/generic_parsing.go b/pkg/ebpf/common/http/generic_parsing.go new file mode 100644 index 000000000..1b58b123d --- /dev/null +++ b/pkg/ebpf/common/http/generic_parsing.go @@ -0,0 +1,97 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ebpfcommon // import "go.opentelemetry.io/obi/pkg/ebpf/common/http" + +import ( + "net/http" + + "go.opentelemetry.io/obi/pkg/appolly/app/request" + "go.opentelemetry.io/obi/pkg/config" +) + +// GenericParsingSpan applies generic HTTP parsing rules to extract headers into the span. +// Regex patterns in rules are already compiled during YAML deserialization. +// Unlike other parsers, this enriches the span with headers rather than replacing it. +func GenericParsingSpan( + baseSpan *request.Span, + req *http.Request, + resp *http.Response, + cfg config.HTTPGenericParsingConfig, +) (request.Span, bool) { + reqHeaders := make(map[string]string) + respHeaders := make(map[string]string) + + // Process request headers + for name, values := range req.Header { + action := resolveHeaderAction(name, cfg.Rules, cfg.Policy, config.HTTPParsingScopeRequest) + applyHeaderAction(action, name, values, reqHeaders, cfg.Policy.ObfuscationString) + } + + // Process response headers + for name, values := range resp.Header { + action := resolveHeaderAction(name, cfg.Rules, cfg.Policy, config.HTTPParsingScopeResponse) + applyHeaderAction(action, name, values, respHeaders, cfg.Policy.ObfuscationString) + } + + if len(reqHeaders) == 0 && len(respHeaders) == 0 { + return *baseSpan, false + } + + if len(reqHeaders) > 0 { + baseSpan.RequestHeaders = reqHeaders + } + if len(respHeaders) > 0 { + baseSpan.ResponseHeaders = respHeaders + } + return *baseSpan, true +} + +// resolveHeaderAction determines what action to take for a given header name +// by evaluating rules in order (first_match_wins). +func resolveHeaderAction( + headerName string, + rules []config.HTTPParsingRule, + policy config.HTTPParsingPolicy, + scope config.HTTPParsingScope, +) config.HTTPParsingAction { + for _, rule := range rules { + if rule.Type != config.HTTPParsingRuleTypeHeaders { + continue + } + if !scopeApplies(rule.Scope, scope) { + continue + } + for _, re := range rule.Match.Regex { + if re.MatchString(headerName) { + return rule.Action + } + } + } + return policy.DefaultAction +} + +// scopeApplies returns true if the rule scope covers the given header source. +func scopeApplies(ruleScope config.HTTPParsingScope, headerSource config.HTTPParsingScope) bool { + return ruleScope == config.HTTPParsingScopeBoth || ruleScope == headerSource +} + +// applyHeaderAction adds the header to the map based on the resolved action. +func applyHeaderAction( + action config.HTTPParsingAction, + name string, + values []string, + headers map[string]string, + obfuscationString string, +) { + switch action { + case config.HTTPParsingActionInclude: + if len(values) > 0 { + headers[name] = values[0] + } + case config.HTTPParsingActionObfuscate: + headers[name] = obfuscationString + case config.HTTPParsingActionExclude: + // do nothing + } +} diff --git a/pkg/ebpf/common/http/generic_parsing_test.go b/pkg/ebpf/common/http/generic_parsing_test.go new file mode 100644 index 000000000..b8e50552b --- /dev/null +++ b/pkg/ebpf/common/http/generic_parsing_test.go @@ -0,0 +1,329 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ebpfcommon + +import ( + "net/http" + "regexp" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v3" + + "go.opentelemetry.io/obi/pkg/appolly/app/request" + "go.opentelemetry.io/obi/pkg/config" +) + +func makeReqResp(reqHeaders, respHeaders map[string]string) (*http.Request, *http.Response) { + req := &http.Request{Header: http.Header{}} + for k, v := range reqHeaders { + req.Header.Set(k, v) + } + resp := &http.Response{Header: http.Header{}} + for k, v := range respHeaders { + resp.Header.Set(k, v) + } + return req, resp +} + +// re is a helper to compile a regex in tests. +func re(pattern string) *regexp.Regexp { + return regexp.MustCompile(pattern) +} + +// rei is a helper to compile a case-insensitive regex in tests. +func rei(pattern string) *regexp.Regexp { + return regexp.MustCompile("(?i)" + pattern) +} + +func TestGenericParsingSpan_IncludeByDefault(t *testing.T) { + cfg := config.HTTPGenericParsingConfig{ + Enabled: true, + Policy: config.HTTPParsingPolicy{ + DefaultAction: config.HTTPParsingActionInclude, + MatchOrder: config.HTTPParsingMatchOrderFirstMatchWins, + ObfuscationString: "*", + }, + } + baseSpan := &request.Span{Method: "GET", Path: "/test"} + req, resp := makeReqResp( + map[string]string{"Content-Type": "application/json", "X-Request-Id": "abc123"}, + map[string]string{"X-Response-Id": "resp456"}, + ) + + span, ok := GenericParsingSpan(baseSpan, req, resp, cfg) + require.True(t, ok) + assert.Equal(t, "application/json", span.RequestHeaders["Content-Type"]) + assert.Equal(t, "abc123", span.RequestHeaders["X-Request-Id"]) + assert.Equal(t, "resp456", span.ResponseHeaders["X-Response-Id"]) +} + +func TestGenericParsingSpan_ExcludeByDefault(t *testing.T) { + cfg := config.HTTPGenericParsingConfig{ + Enabled: true, + Policy: config.HTTPParsingPolicy{ + DefaultAction: config.HTTPParsingActionExclude, + MatchOrder: config.HTTPParsingMatchOrderFirstMatchWins, + ObfuscationString: "*", + }, + } + baseSpan := &request.Span{Method: "GET", Path: "/test"} + req, resp := makeReqResp( + map[string]string{"Content-Type": "application/json"}, + map[string]string{"X-Response-Id": "resp456"}, + ) + + _, ok := GenericParsingSpan(baseSpan, req, resp, cfg) + assert.False(t, ok) +} + +func TestGenericParsingSpan_IncludeRule(t *testing.T) { + cfg := config.HTTPGenericParsingConfig{ + Enabled: true, + Policy: config.HTTPParsingPolicy{ + DefaultAction: config.HTTPParsingActionExclude, + MatchOrder: config.HTTPParsingMatchOrderFirstMatchWins, + ObfuscationString: "*", + }, + Rules: []config.HTTPParsingRule{ + { + Action: config.HTTPParsingActionInclude, + Type: config.HTTPParsingRuleTypeHeaders, + Scope: config.HTTPParsingScopeBoth, + Match: config.HTTPParsingMatch{ + Regex: []*regexp.Regexp{re("^X-Request-Id$")}, + }, + }, + }, + } + baseSpan := &request.Span{Method: "GET", Path: "/test"} + req, resp := makeReqResp( + map[string]string{"Content-Type": "application/json", "X-Request-Id": "abc123"}, + map[string]string{"X-Response-Id": "resp456"}, + ) + + span, ok := GenericParsingSpan(baseSpan, req, resp, cfg) + require.True(t, ok) + assert.Equal(t, "abc123", span.RequestHeaders["X-Request-Id"]) + _, hasContentType := span.RequestHeaders["Content-Type"] + assert.False(t, hasContentType) + assert.Nil(t, span.ResponseHeaders) +} + +func TestGenericParsingSpan_ObfuscateRule(t *testing.T) { + cfg := config.HTTPGenericParsingConfig{ + Enabled: true, + Policy: config.HTTPParsingPolicy{ + DefaultAction: config.HTTPParsingActionExclude, + MatchOrder: config.HTTPParsingMatchOrderFirstMatchWins, + ObfuscationString: "***", + }, + Rules: []config.HTTPParsingRule{ + { + Action: config.HTTPParsingActionObfuscate, + Type: config.HTTPParsingRuleTypeHeaders, + Scope: config.HTTPParsingScopeBoth, + Match: config.HTTPParsingMatch{ + Regex: []*regexp.Regexp{rei("^Authorization$")}, + }, + }, + }, + } + baseSpan := &request.Span{Method: "GET", Path: "/test"} + req, resp := makeReqResp( + map[string]string{"Authorization": "Bearer secret-token", "Content-Type": "text/plain"}, + nil, + ) + + span, ok := GenericParsingSpan(baseSpan, req, resp, cfg) + require.True(t, ok) + assert.Equal(t, "***", span.RequestHeaders["Authorization"]) + _, hasContentType := span.RequestHeaders["Content-Type"] + assert.False(t, hasContentType) +} + +func TestGenericParsingSpan_ScopeRequest(t *testing.T) { + cfg := config.HTTPGenericParsingConfig{ + Enabled: true, + Policy: config.HTTPParsingPolicy{ + DefaultAction: config.HTTPParsingActionExclude, + MatchOrder: config.HTTPParsingMatchOrderFirstMatchWins, + ObfuscationString: "*", + }, + Rules: []config.HTTPParsingRule{ + { + Action: config.HTTPParsingActionInclude, + Type: config.HTTPParsingRuleTypeHeaders, + Scope: config.HTTPParsingScopeRequest, + Match: config.HTTPParsingMatch{ + Regex: []*regexp.Regexp{re("^X-Custom$")}, + }, + }, + }, + } + baseSpan := &request.Span{Method: "GET", Path: "/test"} + req, resp := makeReqResp( + map[string]string{"X-Custom": "req-value"}, + map[string]string{"X-Custom": "resp-value"}, + ) + + span, ok := GenericParsingSpan(baseSpan, req, resp, cfg) + require.True(t, ok) + assert.Equal(t, "req-value", span.RequestHeaders["X-Custom"]) +} + +func TestGenericParsingSpan_ScopeResponse(t *testing.T) { + cfg := config.HTTPGenericParsingConfig{ + Enabled: true, + Policy: config.HTTPParsingPolicy{ + DefaultAction: config.HTTPParsingActionExclude, + MatchOrder: config.HTTPParsingMatchOrderFirstMatchWins, + ObfuscationString: "*", + }, + Rules: []config.HTTPParsingRule{ + { + Action: config.HTTPParsingActionInclude, + Type: config.HTTPParsingRuleTypeHeaders, + Scope: config.HTTPParsingScopeResponse, + Match: config.HTTPParsingMatch{ + Regex: []*regexp.Regexp{re("^X-Custom$")}, + }, + }, + }, + } + baseSpan := &request.Span{Method: "GET", Path: "/test"} + req, resp := makeReqResp( + map[string]string{"X-Custom": "req-value"}, + map[string]string{"X-Custom": "resp-value"}, + ) + + span, ok := GenericParsingSpan(baseSpan, req, resp, cfg) + require.True(t, ok) + assert.Equal(t, "resp-value", span.ResponseHeaders["X-Custom"]) +} + +func TestGenericParsingSpan_CaseInsensitiveMatch(t *testing.T) { + cfg := config.HTTPGenericParsingConfig{ + Enabled: true, + Policy: config.HTTPParsingPolicy{ + DefaultAction: config.HTTPParsingActionExclude, + MatchOrder: config.HTTPParsingMatchOrderFirstMatchWins, + ObfuscationString: "*", + }, + Rules: []config.HTTPParsingRule{ + { + Action: config.HTTPParsingActionInclude, + Type: config.HTTPParsingRuleTypeHeaders, + Scope: config.HTTPParsingScopeBoth, + Match: config.HTTPParsingMatch{ + Regex: []*regexp.Regexp{rei("^x-custom$")}, + }, + }, + }, + } + baseSpan := &request.Span{Method: "GET", Path: "/test"} + req, resp := makeReqResp( + map[string]string{"X-Custom": "value"}, + nil, + ) + + span, ok := GenericParsingSpan(baseSpan, req, resp, cfg) + require.True(t, ok) + assert.Equal(t, "value", span.RequestHeaders["X-Custom"]) +} + +func TestGenericParsingSpan_FirstMatchWins(t *testing.T) { + cfg := config.HTTPGenericParsingConfig{ + Enabled: true, + Policy: config.HTTPParsingPolicy{ + DefaultAction: config.HTTPParsingActionExclude, + MatchOrder: config.HTTPParsingMatchOrderFirstMatchWins, + ObfuscationString: "***", + }, + Rules: []config.HTTPParsingRule{ + { + Action: config.HTTPParsingActionObfuscate, + Type: config.HTTPParsingRuleTypeHeaders, + Scope: config.HTTPParsingScopeBoth, + Match: config.HTTPParsingMatch{ + Regex: []*regexp.Regexp{re("^Authorization$")}, + }, + }, + { + Action: config.HTTPParsingActionInclude, + Type: config.HTTPParsingRuleTypeHeaders, + Scope: config.HTTPParsingScopeBoth, + Match: config.HTTPParsingMatch{ + Regex: []*regexp.Regexp{re(".*")}, + }, + }, + }, + } + baseSpan := &request.Span{Method: "GET", Path: "/test"} + req, resp := makeReqResp( + map[string]string{"Authorization": "Bearer token", "Content-Type": "application/json"}, + nil, + ) + + span, ok := GenericParsingSpan(baseSpan, req, resp, cfg) + require.True(t, ok) + assert.Equal(t, "***", span.RequestHeaders["Authorization"]) + assert.Equal(t, "application/json", span.RequestHeaders["Content-Type"]) +} + +func TestGenericParsingSpan_MultipleRegexInRule(t *testing.T) { + cfg := config.HTTPGenericParsingConfig{ + Enabled: true, + Policy: config.HTTPParsingPolicy{ + DefaultAction: config.HTTPParsingActionExclude, + MatchOrder: config.HTTPParsingMatchOrderFirstMatchWins, + ObfuscationString: "*", + }, + Rules: []config.HTTPParsingRule{ + { + Action: config.HTTPParsingActionInclude, + Type: config.HTTPParsingRuleTypeHeaders, + Scope: config.HTTPParsingScopeBoth, + Match: config.HTTPParsingMatch{ + Regex: []*regexp.Regexp{re("^Content-Type$"), re("^X-Request-Id$")}, + }, + }, + }, + } + baseSpan := &request.Span{Method: "GET", Path: "/test"} + req, resp := makeReqResp( + map[string]string{"Content-Type": "text/html", "X-Request-Id": "123", "Authorization": "secret"}, + nil, + ) + + span, ok := GenericParsingSpan(baseSpan, req, resp, cfg) + require.True(t, ok) + assert.Equal(t, "text/html", span.RequestHeaders["Content-Type"]) + assert.Equal(t, "123", span.RequestHeaders["X-Request-Id"]) + _, hasAuth := span.RequestHeaders["Authorization"] + assert.False(t, hasAuth) +} + +func TestHTTPParsingMatch_UnmarshalYAML(t *testing.T) { + yamlData := ` +rules: + - action: include + type: headers + scope: both + match: + regex: + - "^Content-Type$" + - "^X-Request-Id$" + case_sensitive: false +` + var cfg config.HTTPGenericParsingConfig + err := yaml.Unmarshal([]byte(yamlData), &cfg) + require.NoError(t, err) + require.Len(t, cfg.Rules, 1) + require.Len(t, cfg.Rules[0].Match.Regex, 2) + // case_sensitive=false means (?i) prefix + assert.True(t, cfg.Rules[0].Match.Regex[0].MatchString("content-type")) + assert.True(t, cfg.Rules[0].Match.Regex[0].MatchString("Content-Type")) +} diff --git a/pkg/ebpf/common/http_transform.go b/pkg/ebpf/common/http_transform.go index 478bdee1d..5c2075662 100644 --- a/pkg/ebpf/common/http_transform.go +++ b/pkg/ebpf/common/http_transform.go @@ -172,6 +172,13 @@ func httpRequestResponseToSpan(parseCtx *EBPFParseContext, event *BPFHTTPInfo, r } } + if parseCtx != nil && parseCtx.payloadExtraction.HTTP.GenericParsing.Enabled { + span, ok := ebpfhttp.GenericParsingSpan(&httpSpan, req, resp, parseCtx.payloadExtraction.HTTP.GenericParsing) + if ok { + httpSpan = span + } + } + return httpSpan } diff --git a/pkg/export/otel/traces_test.go b/pkg/export/otel/traces_test.go index 6f6cb30eb..db1b35a3f 100644 --- a/pkg/export/otel/traces_test.go +++ b/pkg/export/otel/traces_test.go @@ -965,6 +965,76 @@ func TestGenerateTracesAttributes(t *testing.T) { ensureTraceAttrNotExists(t, spanAttrs, semconv.GenAIOperationNameKey) ensureTraceAttrNotExists(t, spanAttrs, semconv.GenAIInputMessagesKey) }) + t.Run("test HTTP server span with extracted headers", func(t *testing.T) { + span := request.Span{ + Type: request.EventTypeHTTP, + Method: "GET", + Path: "/api/v1/users", + Route: "/api/v1/users", + Status: 200, + RequestHeaders: map[string]string{ + "Content-Type": "application/json", + "X-Request-Id": "abc-123", + }, + ResponseHeaders: map[string]string{ + "X-Response-Id": "resp-456", + }, + } + + tAttrs := tracesgen.TraceAttributesSelector(&span, map[attr.Name]struct{}{}) + traces := tracesgen.GenerateTracesWithAttributes(cache, &span.Service, []attribute.KeyValue{}, hostID, groupFromSpanAndAttributes(&span, tAttrs), reporterName) + + assert.Equal(t, 1, traces.ResourceSpans().Len()) + spans := traces.ResourceSpans().At(0).ScopeSpans().At(0).Spans() + attrs := spans.At(0).Attributes() + + ensureTraceStrAttr(t, attrs, "http.request.header.content-type", "application/json") + ensureTraceStrAttr(t, attrs, "http.request.header.x-request-id", "abc-123") + ensureTraceStrAttr(t, attrs, "http.response.header.x-response-id", "resp-456") + ensureTraceAttrNotExists(t, attrs, "http.request.header.authorization") + }) + t.Run("test HTTP client span with extracted headers", func(t *testing.T) { + span := request.Span{ + Type: request.EventTypeHTTPClient, + Method: "POST", + Path: "/external/api", + Status: 201, + RequestHeaders: map[string]string{ + "Authorization": "***", + }, + ResponseHeaders: map[string]string{ + "X-Ratelimit-Remaining": "42", + }, + } + + tAttrs := tracesgen.TraceAttributesSelector(&span, map[attr.Name]struct{}{}) + traces := tracesgen.GenerateTracesWithAttributes(cache, &span.Service, []attribute.KeyValue{}, hostID, groupFromSpanAndAttributes(&span, tAttrs), reporterName) + + assert.Equal(t, 1, traces.ResourceSpans().Len()) + spans := traces.ResourceSpans().At(0).ScopeSpans().At(0).Spans() + attrs := spans.At(0).Attributes() + + ensureTraceStrAttr(t, attrs, "http.request.header.authorization", "***") + ensureTraceStrAttr(t, attrs, "http.response.header.x-ratelimit-remaining", "42") + }) + t.Run("test HTTP span without headers has no header attributes", func(t *testing.T) { + span := request.Span{ + Type: request.EventTypeHTTP, + Method: "GET", + Path: "/health", + Status: 200, + } + + tAttrs := tracesgen.TraceAttributesSelector(&span, map[attr.Name]struct{}{}) + traces := tracesgen.GenerateTracesWithAttributes(cache, &span.Service, []attribute.KeyValue{}, hostID, groupFromSpanAndAttributes(&span, tAttrs), reporterName) + + spans := traces.ResourceSpans().At(0).ScopeSpans().At(0).Spans() + attrs := spans.At(0).Attributes() + + // No header attributes should be present + ensureTraceAttrNotExists(t, attrs, "http.request.header.content-type") + ensureTraceAttrNotExists(t, attrs, "http.response.header.content-type") + }) } func TestTraceSampling(t *testing.T) { diff --git a/pkg/export/otel/tracesgen/tracesgen.go b/pkg/export/otel/tracesgen/tracesgen.go index 79cd3f2c5..b95f4359a 100644 --- a/pkg/export/otel/tracesgen/tracesgen.go +++ b/pkg/export/otel/tracesgen/tracesgen.go @@ -10,6 +10,7 @@ import ( "fmt" "math" "strconv" + "strings" "time" expirable2 "github.com/hashicorp/golang-lru/v2/expirable" @@ -298,6 +299,22 @@ var ( spanMetricsSkip = attribute.Bool(string(attr.SkipSpanMetrics), true) ) +// httpHeaderAttributes converts extracted HTTP headers to OTel span attributes +// following the semantic convention: http.request.header. and http.response.header. +// where is the lowercase, hyphen-separated header name. +func httpHeaderAttributes(span *request.Span) []attribute.KeyValue { + var attrs []attribute.KeyValue + for name, value := range span.RequestHeaders { + key := "http.request.header." + strings.ToLower(name) + attrs = append(attrs, attribute.String(key, value)) + } + for name, value := range span.ResponseHeaders { + key := "http.response.header." + strings.ToLower(name) + attrs = append(attrs, attribute.String(key, value)) + } + return attrs +} + //nolint:cyclop func TraceAttributesSelector(span *request.Span, optionalAttrs map[attr.Name]struct{}) []attribute.KeyValue { var attrs []attribute.KeyValue @@ -326,6 +343,7 @@ func TraceAttributesSelector(span *request.Span, optionalAttrs map[attr.Name]str attrs = append(attrs, semconv.GraphQLOperationName(span.GraphQL.OperationName)) attrs = append(attrs, request.GraphqlOperationType(span.GraphQL.OperationType)) } + attrs = append(attrs, httpHeaderAttributes(span)...) case request.EventTypeGRPC: attrs = []attribute.KeyValue{ semconv.RPCMethod(span.Path), @@ -466,6 +484,7 @@ func TraceAttributesSelector(span *request.Span, optionalAttrs map[attr.Name]str } } + attrs = append(attrs, httpHeaderAttributes(span)...) case request.EventTypeGRPCClient: attrs = []attribute.KeyValue{ semconv.RPCMethod(span.Path), diff --git a/pkg/obi/config.go b/pkg/obi/config.go index fb3ba29c6..f18867a33 100644 --- a/pkg/obi/config.go +++ b/pkg/obi/config.go @@ -155,6 +155,15 @@ var DefaultConfig = Config{ OpenAI: config.OpenAIConfig{ Enabled: false, }, + GenericParsing: config.HTTPGenericParsingConfig{ + Enabled: false, + Policy: config.HTTPParsingPolicy{ + DefaultAction: config.HTTPParsingActionExclude, + MatchOrder: config.HTTPParsingMatchOrderFirstMatchWins, + ObfuscationString: "*", + }, + Rules: []config.HTTPParsingRule{}, + }, }, }, MaxTransactionTime: 5 * time.Minute,