diff --git a/src/semantic-router/pkg/extproc/request_handler.go b/src/semantic-router/pkg/extproc/request_handler.go index 7a762aa7..b4ea97cf 100644 --- a/src/semantic-router/pkg/extproc/request_handler.go +++ b/src/semantic-router/pkg/extproc/request_handler.go @@ -17,6 +17,7 @@ import ( "github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache" "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers" "github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics" "github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability" "github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/http" @@ -285,8 +286,8 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques defer span.End() // Store headers for later use - headers := v.RequestHeaders.Headers - for _, h := range headers.Headers { + requestHeaders := v.RequestHeaders.Headers + for _, h := range requestHeaders.Headers { // Prefer Value when available; fall back to RawValue headerValue := h.Value if headerValue == "" && len(h.RawValue) > 0 { @@ -296,7 +297,7 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques ctx.Headers[h.Key] = headerValue // Store request ID if present (case-insensitive) - if strings.ToLower(h.Key) == "x-request-id" { + if strings.ToLower(h.Key) == headers.RequestID { ctx.RequestID = headerValue } } @@ -800,7 +801,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe if selectedEndpoint != "" { setHeaders = append(setHeaders, &core.HeaderValueOption{ Header: &core.HeaderValue{ - Key: "x-gateway-destination-endpoint", + Key: headers.GatewayDestinationEndpoint, RawValue: []byte(selectedEndpoint), }, }) @@ -808,7 +809,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe if actualModel != "" { setHeaders = append(setHeaders, &core.HeaderValueOption{ Header: &core.HeaderValue{ - Key: "x-selected-model", + Key: headers.SelectedModel, RawValue: []byte(actualModel), }, }) @@ -889,7 +890,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe if selectedEndpoint != "" { setHeaders = append(setHeaders, &core.HeaderValueOption{ Header: &core.HeaderValue{ - Key: "x-gateway-destination-endpoint", + Key: headers.GatewayDestinationEndpoint, RawValue: []byte(selectedEndpoint), }, }) @@ -1042,9 +1043,9 @@ func (r *OpenAIRouter) updateRequestWithTools(openAIRequest *openai.ChatCompleti (*response).GetRequestBody().GetResponse().GetHeaderMutation().GetSetHeaders() != nil { for _, header := range (*response).GetRequestBody().GetResponse().GetHeaderMutation().GetSetHeaders() { switch header.Header.Key { - case "x-gateway-destination-endpoint": + case headers.GatewayDestinationEndpoint: selectedEndpoint = header.Header.Value - case "x-selected-model": + case headers.SelectedModel: actualModel = header.Header.Value } } @@ -1054,7 +1055,7 @@ func (r *OpenAIRouter) updateRequestWithTools(openAIRequest *openai.ChatCompleti if selectedEndpoint != "" { setHeaders = append(setHeaders, &core.HeaderValueOption{ Header: &core.HeaderValue{ - Key: "x-gateway-destination-endpoint", + Key: headers.GatewayDestinationEndpoint, RawValue: []byte(selectedEndpoint), }, }) @@ -1062,7 +1063,7 @@ func (r *OpenAIRouter) updateRequestWithTools(openAIRequest *openai.ChatCompleti if actualModel != "" { setHeaders = append(setHeaders, &core.HeaderValueOption{ Header: &core.HeaderValue{ - Key: "x-selected-model", + Key: headers.SelectedModel, RawValue: []byte(actualModel), }, }) diff --git a/src/semantic-router/pkg/extproc/response_handler.go b/src/semantic-router/pkg/extproc/response_handler.go index 5b3ec0b3..ce22b281 100644 --- a/src/semantic-router/pkg/extproc/response_handler.go +++ b/src/semantic-router/pkg/extproc/response_handler.go @@ -11,6 +11,7 @@ import ( ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "github.com/openai/openai-go" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers" "github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics" "github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability" ) @@ -60,7 +61,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo if ctx.VSRSelectedCategory != "" { setHeaders = append(setHeaders, &core.HeaderValueOption{ Header: &core.HeaderValue{ - Key: "x-vsr-selected-category", + Key: headers.VSRSelectedCategory, RawValue: []byte(ctx.VSRSelectedCategory), }, }) @@ -70,7 +71,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo if ctx.VSRReasoningMode != "" { setHeaders = append(setHeaders, &core.HeaderValueOption{ Header: &core.HeaderValue{ - Key: "x-vsr-selected-reasoning", + Key: headers.VSRSelectedReasoning, RawValue: []byte(ctx.VSRReasoningMode), }, }) @@ -80,7 +81,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo if ctx.VSRSelectedModel != "" { setHeaders = append(setHeaders, &core.HeaderValueOption{ Header: &core.HeaderValue{ - Key: "x-vsr-selected-model", + Key: headers.VSRSelectedModel, RawValue: []byte(ctx.VSRSelectedModel), }, }) @@ -93,7 +94,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo } setHeaders = append(setHeaders, &core.HeaderValueOption{ Header: &core.HeaderValue{ - Key: "x-vsr-injected-system-prompt", + Key: headers.VSRInjectedSystemPrompt, RawValue: []byte(injectedValue), }, }) diff --git a/src/semantic-router/pkg/headers/headers.go b/src/semantic-router/pkg/headers/headers.go new file mode 100644 index 00000000..2e9673bc --- /dev/null +++ b/src/semantic-router/pkg/headers/headers.go @@ -0,0 +1,67 @@ +package headers + +// Package headers provides constants for all custom HTTP headers used in the semantic router. +// All custom headers follow the "x-" prefix convention for non-standard HTTP headers. + +// Request Headers +// These headers are used in incoming requests to the semantic router. +const ( + // RequestID is the unique identifier for tracking a request through the system. + // This header is case-insensitive when read from incoming requests. + RequestID = "x-request-id" + + // GatewayDestinationEndpoint specifies the backend endpoint address selected by the router. + // This header is set by the router to direct Envoy to the appropriate upstream service. + GatewayDestinationEndpoint = "x-gateway-destination-endpoint" + + // SelectedModel indicates the model that was selected by the router for processing. + // This header is set during the routing decision phase. + SelectedModel = "x-selected-model" +) + +// VSR Decision Tracking Headers +// These headers are added to successful responses (HTTP 200-299) to track +// Vector Semantic Router decision-making information for debugging and monitoring. +// Headers are only added when the request is successful and did not hit the cache. +const ( + // VSRSelectedCategory indicates the category selected by VSR during classification. + // Example values: "math", "business", "biology", "computer_science" + VSRSelectedCategory = "x-vsr-selected-category" + + // VSRSelectedReasoning indicates whether reasoning mode was determined to be used. + // Values: "on" (reasoning enabled) or "off" (reasoning disabled) + VSRSelectedReasoning = "x-vsr-selected-reasoning" + + // VSRSelectedModel indicates the model selected by VSR for processing the request. + // Example values: "deepseek-v31", "phi4", "gpt-4" + VSRSelectedModel = "x-vsr-selected-model" + + // VSRInjectedSystemPrompt indicates whether a system prompt was injected into the request. + // Values: "true" or "false" + VSRInjectedSystemPrompt = "x-vsr-injected-system-prompt" + + // VSRCacheHit indicates that the response was served from cache. + // Value: "true" + VSRCacheHit = "x-vsr-cache-hit" +) + +// Security Headers +// These headers are added to responses when security policies are violated +// or security checks detect potential threats. +const ( + // VSRPIIViolation indicates that the request was blocked due to PII policy violation. + // Value: "true" + VSRPIIViolation = "x-vsr-pii-violation" + + // VSRJailbreakBlocked indicates that a jailbreak attempt was detected and blocked. + // Value: "true" + VSRJailbreakBlocked = "x-vsr-jailbreak-blocked" + + // VSRJailbreakType specifies the type of jailbreak attempt that was detected. + // Example values depend on the jailbreak detection classifier. + VSRJailbreakType = "x-vsr-jailbreak-type" + + // VSRJailbreakConfidence indicates the confidence level of the jailbreak detection. + // Value: floating point number formatted as string (e.g., "0.950") + VSRJailbreakConfidence = "x-vsr-jailbreak-confidence" +) diff --git a/src/semantic-router/pkg/headers/headers_test.go b/src/semantic-router/pkg/headers/headers_test.go new file mode 100644 index 00000000..e70122d8 --- /dev/null +++ b/src/semantic-router/pkg/headers/headers_test.go @@ -0,0 +1,37 @@ +package headers + +import ( + "testing" +) + +func TestHeaderConstants(t *testing.T) { + tests := []struct { + name string + header string + expected string + }{ + // Request headers + {"RequestID", RequestID, "x-request-id"}, + {"GatewayDestinationEndpoint", GatewayDestinationEndpoint, "x-gateway-destination-endpoint"}, + {"SelectedModel", SelectedModel, "x-selected-model"}, + // VSR headers + {"VSRSelectedCategory", VSRSelectedCategory, "x-vsr-selected-category"}, + {"VSRSelectedReasoning", VSRSelectedReasoning, "x-vsr-selected-reasoning"}, + {"VSRSelectedModel", VSRSelectedModel, "x-vsr-selected-model"}, + {"VSRInjectedSystemPrompt", VSRInjectedSystemPrompt, "x-vsr-injected-system-prompt"}, + {"VSRCacheHit", VSRCacheHit, "x-vsr-cache-hit"}, + // Security headers + {"VSRPIIViolation", VSRPIIViolation, "x-vsr-pii-violation"}, + {"VSRJailbreakBlocked", VSRJailbreakBlocked, "x-vsr-jailbreak-blocked"}, + {"VSRJailbreakType", VSRJailbreakType, "x-vsr-jailbreak-type"}, + {"VSRJailbreakConfidence", VSRJailbreakConfidence, "x-vsr-jailbreak-confidence"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.header != tt.expected { + t.Errorf("Expected %s to be %q, got %q", tt.name, tt.expected, tt.header) + } + }) + } +} diff --git a/src/semantic-router/pkg/utils/http/response.go b/src/semantic-router/pkg/utils/http/response.go index c38f903d..d2f03b4c 100644 --- a/src/semantic-router/pkg/utils/http/response.go +++ b/src/semantic-router/pkg/utils/http/response.go @@ -9,6 +9,7 @@ import ( ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3" "github.com/openai/openai-go" + "github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers" "github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics" "github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability" ) @@ -101,7 +102,7 @@ func CreatePIIViolationResponse(model string, deniedPII []string, isStreaming bo }, { Header: &core.HeaderValue{ - Key: "x-pii-violation", + Key: headers.VSRPIIViolation, RawValue: []byte("true"), }, }, @@ -202,19 +203,19 @@ func CreateJailbreakViolationResponse(jailbreakType string, confidence float32, }, { Header: &core.HeaderValue{ - Key: "x-jailbreak-blocked", + Key: headers.VSRJailbreakBlocked, RawValue: []byte("true"), }, }, { Header: &core.HeaderValue{ - Key: "x-jailbreak-type", + Key: headers.VSRJailbreakType, RawValue: []byte(jailbreakType), }, }, { Header: &core.HeaderValue{ - Key: "x-jailbreak-confidence", + Key: headers.VSRJailbreakConfidence, RawValue: []byte(fmt.Sprintf("%.3f", confidence)), }, }, @@ -246,7 +247,7 @@ func CreateCacheHitResponse(cachedResponse []byte) *ext_proc.ProcessingResponse }, { Header: &core.HeaderValue{ - Key: "x-vsr-cache-hit", + Key: headers.VSRCacheHit, RawValue: []byte("true"), }, },