vllm-project · rootfs · Oct 8, 2025 · Oct 8, 2025
@@ -17,6 +17,7 @@ import (
 
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/http"
@@ -285,8 +286,8 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques
 	defer span.End()
 
 	// Store headers for later use
-	headers := v.RequestHeaders.Headers
-	for _, h := range headers.Headers {
+	requestHeaders := v.RequestHeaders.Headers
+	for _, h := range requestHeaders.Headers {
 		// Prefer Value when available; fall back to RawValue
 		headerValue := h.Value
 		if headerValue == "" && len(h.RawValue) > 0 {
@@ -296,7 +297,7 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques
 
 		ctx.Headers[h.Key] = headerValue
 		// Store request ID if present (case-insensitive)
-		if strings.ToLower(h.Key) == "x-request-id" {
+		if strings.ToLower(h.Key) == headers.RequestID {
 			ctx.RequestID = headerValue
 		}
 	}
@@ -800,15 +801,15 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 				if selectedEndpoint != "" {
 					setHeaders = append(setHeaders, &core.HeaderValueOption{
 						Header: &core.HeaderValue{
-							Key:      "x-gateway-destination-endpoint",
+							Key:      headers.GatewayDestinationEndpoint,
 							RawValue: []byte(selectedEndpoint),
 						},
 					})
 				}
 				if actualModel != "" {
 					setHeaders = append(setHeaders, &core.HeaderValueOption{
 						Header: &core.HeaderValue{
-							Key:      "x-selected-model",
+							Key:      headers.SelectedModel,
 							RawValue: []byte(actualModel),
 						},
 					})
@@ -889,7 +890,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
 		if selectedEndpoint != "" {
 			setHeaders = append(setHeaders, &core.HeaderValueOption{
 				Header: &core.HeaderValue{
-					Key:      "x-gateway-destination-endpoint",
+					Key:      headers.GatewayDestinationEndpoint,
 					RawValue: []byte(selectedEndpoint),
 				},
 			})
@@ -1042,9 +1043,9 @@ func (r *OpenAIRouter) updateRequestWithTools(openAIRequest *openai.ChatCompleti
 		(*response).GetRequestBody().GetResponse().GetHeaderMutation().GetSetHeaders() != nil {
 		for _, header := range (*response).GetRequestBody().GetResponse().GetHeaderMutation().GetSetHeaders() {
 			switch header.Header.Key {
-			case "x-gateway-destination-endpoint":
+			case headers.GatewayDestinationEndpoint:
 				selectedEndpoint = header.Header.Value
-			case "x-selected-model":
+			case headers.SelectedModel:
 				actualModel = header.Header.Value
 			}
 		}
@@ -1054,15 +1055,15 @@ func (r *OpenAIRouter) updateRequestWithTools(openAIRequest *openai.ChatCompleti
 	if selectedEndpoint != "" {
 		setHeaders = append(setHeaders, &core.HeaderValueOption{
 			Header: &core.HeaderValue{
-				Key:      "x-gateway-destination-endpoint",
+				Key:      headers.GatewayDestinationEndpoint,
 				RawValue: []byte(selectedEndpoint),
 			},
 		})
 	}
 	if actualModel != "" {
 		setHeaders = append(setHeaders, &core.HeaderValueOption{
 			Header: &core.HeaderValue{
-				Key:      "x-selected-model",
+				Key:      headers.SelectedModel,
 				RawValue: []byte(actualModel),
 			},
 		})

@@ -11,6 +11,7 @@ import (
 	ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 
 	"github.com/openai/openai-go"
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
 )
@@ -60,7 +61,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
 		if ctx.VSRSelectedCategory != "" {
 			setHeaders = append(setHeaders, &core.HeaderValueOption{
 				Header: &core.HeaderValue{
-					Key:      "x-vsr-selected-category",
+					Key:      headers.VSRSelectedCategory,
 					RawValue: []byte(ctx.VSRSelectedCategory),
 				},
 			})
@@ -70,7 +71,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
 		if ctx.VSRReasoningMode != "" {
 			setHeaders = append(setHeaders, &core.HeaderValueOption{
 				Header: &core.HeaderValue{
-					Key:      "x-vsr-selected-reasoning",
+					Key:      headers.VSRSelectedReasoning,
 					RawValue: []byte(ctx.VSRReasoningMode),
 				},
 			})
@@ -80,7 +81,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
 		if ctx.VSRSelectedModel != "" {
 			setHeaders = append(setHeaders, &core.HeaderValueOption{
 				Header: &core.HeaderValue{
-					Key:      "x-vsr-selected-model",
+					Key:      headers.VSRSelectedModel,
 					RawValue: []byte(ctx.VSRSelectedModel),
 				},
 			})
@@ -93,7 +94,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
 		}
 		setHeaders = append(setHeaders, &core.HeaderValueOption{
 			Header: &core.HeaderValue{
-				Key:      "x-vsr-injected-system-prompt",
+				Key:      headers.VSRInjectedSystemPrompt,
 				RawValue: []byte(injectedValue),
 			},
 		})

@@ -0,0 +1,67 @@
+package headers
+
+// Package headers provides constants for all custom HTTP headers used in the semantic router.
+// All custom headers follow the "x-" prefix convention for non-standard HTTP headers.
+
+// Request Headers
+// These headers are used in incoming requests to the semantic router.
+const (
+	// RequestID is the unique identifier for tracking a request through the system.
+	// This header is case-insensitive when read from incoming requests.
+	RequestID = "x-request-id"
+
+	// GatewayDestinationEndpoint specifies the backend endpoint address selected by the router.
+	// This header is set by the router to direct Envoy to the appropriate upstream service.
+	GatewayDestinationEndpoint = "x-gateway-destination-endpoint"
+
+	// SelectedModel indicates the model that was selected by the router for processing.
+	// This header is set during the routing decision phase.
+	SelectedModel = "x-selected-model"
+)
+
+// VSR Decision Tracking Headers
+// These headers are added to successful responses (HTTP 200-299) to track
+// Vector Semantic Router decision-making information for debugging and monitoring.
+// Headers are only added when the request is successful and did not hit the cache.
+const (
+	// VSRSelectedCategory indicates the category selected by VSR during classification.
+	// Example values: "math", "business", "biology", "computer_science"
+	VSRSelectedCategory = "x-vsr-selected-category"
+
+	// VSRSelectedReasoning indicates whether reasoning mode was determined to be used.
+	// Values: "on" (reasoning enabled) or "off" (reasoning disabled)
+	VSRSelectedReasoning = "x-vsr-selected-reasoning"
+
+	// VSRSelectedModel indicates the model selected by VSR for processing the request.
+	// Example values: "deepseek-v31", "phi4", "gpt-4"
+	VSRSelectedModel = "x-vsr-selected-model"
+
+	// VSRInjectedSystemPrompt indicates whether a system prompt was injected into the request.
+	// Values: "true" or "false"
+	VSRInjectedSystemPrompt = "x-vsr-injected-system-prompt"
+
+	// VSRCacheHit indicates that the response was served from cache.
+	// Value: "true"
+	VSRCacheHit = "x-vsr-cache-hit"
+)
+
+// Security Headers
+// These headers are added to responses when security policies are violated
+// or security checks detect potential threats.
+const (
+	// VSRPIIViolation indicates that the request was blocked due to PII policy violation.
+	// Value: "true"
+	VSRPIIViolation = "x-vsr-pii-violation"
+
+	// VSRJailbreakBlocked indicates that a jailbreak attempt was detected and blocked.
+	// Value: "true"
+	VSRJailbreakBlocked = "x-vsr-jailbreak-blocked"
+
+	// VSRJailbreakType specifies the type of jailbreak attempt that was detected.
+	// Example values depend on the jailbreak detection classifier.
+	VSRJailbreakType = "x-vsr-jailbreak-type"
+
+	// VSRJailbreakConfidence indicates the confidence level of the jailbreak detection.
+	// Value: floating point number formatted as string (e.g., "0.950")
+	VSRJailbreakConfidence = "x-vsr-jailbreak-confidence"
+)
@@ -0,0 +1,37 @@
+package headers
+
+import (
+	"testing"
+)
+
+func TestHeaderConstants(t *testing.T) {
+	tests := []struct {
+		name     string
+		header   string
+		expected string
+	}{
+		// Request headers
+		{"RequestID", RequestID, "x-request-id"},
+		{"GatewayDestinationEndpoint", GatewayDestinationEndpoint, "x-gateway-destination-endpoint"},
+		{"SelectedModel", SelectedModel, "x-selected-model"},
+		// VSR headers
+		{"VSRSelectedCategory", VSRSelectedCategory, "x-vsr-selected-category"},
+		{"VSRSelectedReasoning", VSRSelectedReasoning, "x-vsr-selected-reasoning"},
+		{"VSRSelectedModel", VSRSelectedModel, "x-vsr-selected-model"},
+		{"VSRInjectedSystemPrompt", VSRInjectedSystemPrompt, "x-vsr-injected-system-prompt"},
+		{"VSRCacheHit", VSRCacheHit, "x-vsr-cache-hit"},
+		// Security headers
+		{"VSRPIIViolation", VSRPIIViolation, "x-vsr-pii-violation"},
+		{"VSRJailbreakBlocked", VSRJailbreakBlocked, "x-vsr-jailbreak-blocked"},
+		{"VSRJailbreakType", VSRJailbreakType, "x-vsr-jailbreak-type"},
+		{"VSRJailbreakConfidence", VSRJailbreakConfidence, "x-vsr-jailbreak-confidence"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if tt.header != tt.expected {
+				t.Errorf("Expected %s to be %q, got %q", tt.name, tt.expected, tt.header)
+			}
+		})
+	}
+}
@@ -9,6 +9,7 @@ import (
 	ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 	typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
 	"github.com/openai/openai-go"
+	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
 )
@@ -101,7 +102,7 @@ func CreatePIIViolationResponse(model string, deniedPII []string, isStreaming bo
 				},
 				{
 					Header: &core.HeaderValue{
-						Key:      "x-pii-violation",
+						Key:      headers.VSRPIIViolation,
 						RawValue: []byte("true"),
 					},
 				},
@@ -202,19 +203,19 @@ func CreateJailbreakViolationResponse(jailbreakType string, confidence float32,
 				},
 				{
 					Header: &core.HeaderValue{
-						Key:      "x-jailbreak-blocked",
+						Key:      headers.VSRJailbreakBlocked,
 						RawValue: []byte("true"),
 					},
 				},
 				{
 					Header: &core.HeaderValue{
-						Key:      "x-jailbreak-type",
+						Key:      headers.VSRJailbreakType,
 						RawValue: []byte(jailbreakType),
 					},
 				},
 				{
 					Header: &core.HeaderValue{
-						Key:      "x-jailbreak-confidence",
+						Key:      headers.VSRJailbreakConfidence,
 						RawValue: []byte(fmt.Sprintf("%.3f", confidence)),
 					},
 				},
@@ -246,7 +247,7 @@ func CreateCacheHitResponse(cachedResponse []byte) *ext_proc.ProcessingResponse
 				},
 				{
 					Header: &core.HeaderValue{
-						Key:      "x-vsr-cache-hit",
+						Key:      headers.VSRCacheHit,
 						RawValue: []byte("true"),
 					},
 				},