Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions src/semantic-router/pkg/extproc/request_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (

"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/http"
Expand Down Expand Up @@ -285,8 +286,8 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques
defer span.End()

// Store headers for later use
headers := v.RequestHeaders.Headers
for _, h := range headers.Headers {
requestHeaders := v.RequestHeaders.Headers
for _, h := range requestHeaders.Headers {
// Prefer Value when available; fall back to RawValue
headerValue := h.Value
if headerValue == "" && len(h.RawValue) > 0 {
Expand All @@ -296,7 +297,7 @@ func (r *OpenAIRouter) handleRequestHeaders(v *ext_proc.ProcessingRequest_Reques

ctx.Headers[h.Key] = headerValue
// Store request ID if present (case-insensitive)
if strings.ToLower(h.Key) == "x-request-id" {
if strings.ToLower(h.Key) == headers.RequestID {
ctx.RequestID = headerValue
}
}
Expand Down Expand Up @@ -800,15 +801,15 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
if selectedEndpoint != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
Key: "x-gateway-destination-endpoint",
Key: headers.GatewayDestinationEndpoint,
RawValue: []byte(selectedEndpoint),
},
})
}
if actualModel != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
Key: "x-selected-model",
Key: headers.SelectedModel,
RawValue: []byte(actualModel),
},
})
Expand Down Expand Up @@ -889,7 +890,7 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
if selectedEndpoint != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
Key: "x-gateway-destination-endpoint",
Key: headers.GatewayDestinationEndpoint,
RawValue: []byte(selectedEndpoint),
},
})
Expand Down Expand Up @@ -1042,9 +1043,9 @@ func (r *OpenAIRouter) updateRequestWithTools(openAIRequest *openai.ChatCompleti
(*response).GetRequestBody().GetResponse().GetHeaderMutation().GetSetHeaders() != nil {
for _, header := range (*response).GetRequestBody().GetResponse().GetHeaderMutation().GetSetHeaders() {
switch header.Header.Key {
case "x-gateway-destination-endpoint":
case headers.GatewayDestinationEndpoint:
selectedEndpoint = header.Header.Value
case "x-selected-model":
case headers.SelectedModel:
actualModel = header.Header.Value
}
}
Expand All @@ -1054,15 +1055,15 @@ func (r *OpenAIRouter) updateRequestWithTools(openAIRequest *openai.ChatCompleti
if selectedEndpoint != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
Key: "x-gateway-destination-endpoint",
Key: headers.GatewayDestinationEndpoint,
RawValue: []byte(selectedEndpoint),
},
})
}
if actualModel != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
Key: "x-selected-model",
Key: headers.SelectedModel,
RawValue: []byte(actualModel),
},
})
Expand Down
9 changes: 5 additions & 4 deletions src/semantic-router/pkg/extproc/response_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"

"github.com/openai/openai-go"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
)
Expand Down Expand Up @@ -60,7 +61,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
if ctx.VSRSelectedCategory != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
Key: "x-vsr-selected-category",
Key: headers.VSRSelectedCategory,
RawValue: []byte(ctx.VSRSelectedCategory),
},
})
Expand All @@ -70,7 +71,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
if ctx.VSRReasoningMode != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
Key: "x-vsr-selected-reasoning",
Key: headers.VSRSelectedReasoning,
RawValue: []byte(ctx.VSRReasoningMode),
},
})
Expand All @@ -80,7 +81,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
if ctx.VSRSelectedModel != "" {
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
Key: "x-vsr-selected-model",
Key: headers.VSRSelectedModel,
RawValue: []byte(ctx.VSRSelectedModel),
},
})
Expand All @@ -93,7 +94,7 @@ func (r *OpenAIRouter) handleResponseHeaders(v *ext_proc.ProcessingRequest_Respo
}
setHeaders = append(setHeaders, &core.HeaderValueOption{
Header: &core.HeaderValue{
Key: "x-vsr-injected-system-prompt",
Key: headers.VSRInjectedSystemPrompt,
RawValue: []byte(injectedValue),
},
})
Expand Down
67 changes: 67 additions & 0 deletions src/semantic-router/pkg/headers/headers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package headers

// Package headers provides constants for all custom HTTP headers used in the semantic router.
// All custom headers follow the "x-" prefix convention for non-standard HTTP headers.

// Request Headers
// These headers are used in incoming requests to the semantic router.
const (
// RequestID is the unique identifier for tracking a request through the system.
// This header is case-insensitive when read from incoming requests.
RequestID = "x-request-id"

// GatewayDestinationEndpoint specifies the backend endpoint address selected by the router.
// This header is set by the router to direct Envoy to the appropriate upstream service.
GatewayDestinationEndpoint = "x-gateway-destination-endpoint"

// SelectedModel indicates the model that was selected by the router for processing.
// This header is set during the routing decision phase.
SelectedModel = "x-selected-model"
)

// VSR Decision Tracking Headers
// These headers are added to successful responses (HTTP 200-299) to track
// Vector Semantic Router decision-making information for debugging and monitoring.
// Headers are only added when the request is successful and did not hit the cache.
const (
// VSRSelectedCategory indicates the category selected by VSR during classification.
// Example values: "math", "business", "biology", "computer_science"
VSRSelectedCategory = "x-vsr-selected-category"

// VSRSelectedReasoning indicates whether reasoning mode was determined to be used.
// Values: "on" (reasoning enabled) or "off" (reasoning disabled)
VSRSelectedReasoning = "x-vsr-selected-reasoning"

// VSRSelectedModel indicates the model selected by VSR for processing the request.
// Example values: "deepseek-v31", "phi4", "gpt-4"
VSRSelectedModel = "x-vsr-selected-model"

// VSRInjectedSystemPrompt indicates whether a system prompt was injected into the request.
// Values: "true" or "false"
VSRInjectedSystemPrompt = "x-vsr-injected-system-prompt"

// VSRCacheHit indicates that the response was served from cache.
// Value: "true"
VSRCacheHit = "x-vsr-cache-hit"
)

// Security Headers
// These headers are added to responses when security policies are violated
// or security checks detect potential threats.
const (
// VSRPIIViolation indicates that the request was blocked due to PII policy violation.
// Value: "true"
VSRPIIViolation = "x-vsr-pii-violation"

// VSRJailbreakBlocked indicates that a jailbreak attempt was detected and blocked.
// Value: "true"
VSRJailbreakBlocked = "x-vsr-jailbreak-blocked"

// VSRJailbreakType specifies the type of jailbreak attempt that was detected.
// Example values depend on the jailbreak detection classifier.
VSRJailbreakType = "x-vsr-jailbreak-type"

// VSRJailbreakConfidence indicates the confidence level of the jailbreak detection.
// Value: floating point number formatted as string (e.g., "0.950")
VSRJailbreakConfidence = "x-vsr-jailbreak-confidence"
)
37 changes: 37 additions & 0 deletions src/semantic-router/pkg/headers/headers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package headers

import (
"testing"
)

func TestHeaderConstants(t *testing.T) {
tests := []struct {
name string
header string
expected string
}{
// Request headers
{"RequestID", RequestID, "x-request-id"},
{"GatewayDestinationEndpoint", GatewayDestinationEndpoint, "x-gateway-destination-endpoint"},
{"SelectedModel", SelectedModel, "x-selected-model"},
// VSR headers
{"VSRSelectedCategory", VSRSelectedCategory, "x-vsr-selected-category"},
{"VSRSelectedReasoning", VSRSelectedReasoning, "x-vsr-selected-reasoning"},
{"VSRSelectedModel", VSRSelectedModel, "x-vsr-selected-model"},
{"VSRInjectedSystemPrompt", VSRInjectedSystemPrompt, "x-vsr-injected-system-prompt"},
{"VSRCacheHit", VSRCacheHit, "x-vsr-cache-hit"},
// Security headers
{"VSRPIIViolation", VSRPIIViolation, "x-vsr-pii-violation"},
{"VSRJailbreakBlocked", VSRJailbreakBlocked, "x-vsr-jailbreak-blocked"},
{"VSRJailbreakType", VSRJailbreakType, "x-vsr-jailbreak-type"},
{"VSRJailbreakConfidence", VSRJailbreakConfidence, "x-vsr-jailbreak-confidence"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.header != tt.expected {
t.Errorf("Expected %s to be %q, got %q", tt.name, tt.expected, tt.header)
}
})
}
}
11 changes: 6 additions & 5 deletions src/semantic-router/pkg/utils/http/response.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
ext_proc "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
"github.com/openai/openai-go"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
)
Expand Down Expand Up @@ -101,7 +102,7 @@ func CreatePIIViolationResponse(model string, deniedPII []string, isStreaming bo
},
{
Header: &core.HeaderValue{
Key: "x-pii-violation",
Key: headers.VSRPIIViolation,
RawValue: []byte("true"),
},
},
Expand Down Expand Up @@ -202,19 +203,19 @@ func CreateJailbreakViolationResponse(jailbreakType string, confidence float32,
},
{
Header: &core.HeaderValue{
Key: "x-jailbreak-blocked",
Key: headers.VSRJailbreakBlocked,
RawValue: []byte("true"),
},
},
{
Header: &core.HeaderValue{
Key: "x-jailbreak-type",
Key: headers.VSRJailbreakType,
RawValue: []byte(jailbreakType),
},
},
{
Header: &core.HeaderValue{
Key: "x-jailbreak-confidence",
Key: headers.VSRJailbreakConfidence,
RawValue: []byte(fmt.Sprintf("%.3f", confidence)),
},
},
Expand Down Expand Up @@ -246,7 +247,7 @@ func CreateCacheHitResponse(cachedResponse []byte) *ext_proc.ProcessingResponse
},
{
Header: &core.HeaderValue{
Key: "x-vsr-cache-hit",
Key: headers.VSRCacheHit,
RawValue: []byte("true"),
},
},
Expand Down
Loading