lint

Xunzhuo · Xunzhuo · commit 9a1c4e31dd75 · 2025-11-06T00:53:19.000+08:00
Signed-off-by: bitliu &lt;bitliu@tencent.com&gt;
diff --git a/config/envoy.yaml b/config/envoy.yaml
@@ -31,7 +31,6 @@ static_resources:
                   upstream_local_address: "%UPSTREAM_LOCAL_ADDRESS%"
                   request_id: "%REQ(X-REQUEST-ID)%"
                   selected_model: "%REQ(X-SELECTED-MODEL)%"
-                  selected_endpoint: "%REQ(X-GATEWAY-DESTINATION-ENDPOINT)%"
           route_config:
             name: local_route
             virtual_hosts:
@@ -106,7 +105,7 @@ static_resources:
     lb_policy: CLUSTER_PROVIDED
     original_dst_lb_config:
       use_http_header: true
-      http_header_name: "x-gateway-destination-endpoint"
+      http_header_name: "x-vsr-destination-endpoint"
     typed_extension_protocol_options:
       envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
         "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
diff --git a/deploy/docker-compose/addons/envoy.yaml b/deploy/docker-compose/addons/envoy.yaml
@@ -31,7 +31,6 @@ static_resources:
                   upstream_local_address: "%UPSTREAM_LOCAL_ADDRESS%"
                   request_id: "%REQ(X-REQUEST-ID)%"
                   selected_model: "%REQ(X-SELECTED-MODEL)%"
-                  selected_endpoint: "%REQ(X-GATEWAY-DESTINATION-ENDPOINT)%"
           route_config:
             name: local_route
             virtual_hosts:
@@ -106,7 +105,7 @@ static_resources:
     lb_policy: CLUSTER_PROVIDED
     original_dst_lb_config:
       use_http_header: true
-      http_header_name: "x-gateway-destination-endpoint"
+      http_header_name: "x-vsr-destination-endpoint"
     typed_extension_protocol_options:
       envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
         "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
diff --git a/deploy/openshift/envoy-openshift.yaml b/deploy/openshift/envoy-openshift.yaml
@@ -1,6 +1,6 @@
 # OpenShift-specific Envoy configuration
 # This config uses ORIGINAL_DST cluster with header-based destination selection
-# The semantic router sets the x-gateway-destination-endpoint header which Envoy uses
+# The semantic router sets the x-vsr-destination-endpoint header which Envoy uses
 # to dynamically route to the correct vLLM endpoint (port 8000 or 8001)
 static_resources:
   listeners:
@@ -35,7 +35,7 @@ static_resources:
                   upstream_local_address: "%UPSTREAM_LOCAL_ADDRESS%"
                   request_id: "%REQ(X-REQUEST-ID)%"
                   selected_model: "%REQ(X-SELECTED-MODEL)%"
-                  selected_endpoint: "%REQ(X-GATEWAY-DESTINATION-ENDPOINT)%"
+                  selected_endpoint: "%REQ(x-vsr-destination-endpoint)%"
           route_config:
             name: local_route
             virtual_hosts:
@@ -48,7 +48,7 @@ static_resources:
                 route:
                   cluster: semantic_router_cluster
                   timeout: 300s
-              # Dynamic route - destination determined by x-gateway-destination-endpoint header
+              # Dynamic route - destination determined by x-vsr-destination-endpoint header
               - match:
                   prefix: "/"
                 route:
@@ -130,15 +130,15 @@ static_resources:
           http_protocol_options: {}
 
   # Dynamic vLLM cluster using ORIGINAL_DST with header-based destination
-  # The semantic router sets x-gateway-destination-endpoint header to specify the target
+  # The semantic router sets x-vsr-destination-endpoint header to specify the target
   - name: vllm_dynamic_cluster
     connect_timeout: 300s
     per_connection_buffer_limit_bytes: 52428800
     type: ORIGINAL_DST
     lb_policy: CLUSTER_PROVIDED
     original_dst_lb_config:
       use_http_header: true
-      http_header_name: "x-gateway-destination-endpoint"
+      http_header_name: "x-vsr-destination-endpoint"
     typed_extension_protocol_options:
       envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
         "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
diff --git a/src/semantic-router/pkg/extproc/extproc_test.go b/src/semantic-router/pkg/extproc/extproc_test.go
@@ -1901,7 +1901,7 @@ var _ = Describe("Endpoint Selection", func() {
 					var modelHeaderFound bool
 
 					for _, header := range headerMutation.SetHeaders {
-						if header.Header.Key == "x-gateway-destination-endpoint" {
+						if header.Header.Key == "x-vsr-destination-endpoint" {
 							endpointHeaderFound = true
 							// Should be one of the configured endpoint addresses
 							// Check both Value and RawValue since implementation uses RawValue
@@ -1975,7 +1975,7 @@ var _ = Describe("Endpoint Selection", func() {
 					var selectedEndpoint string
 
 					for _, header := range headerMutation.SetHeaders {
-						if header.Header.Key == "x-gateway-destination-endpoint" {
+						if header.Header.Key == "x-vsr-destination-endpoint" {
 							endpointHeaderFound = true
 							// Check both Value and RawValue since implementation uses RawValue
 							selectedEndpoint = header.Header.Value
@@ -2038,7 +2038,7 @@ var _ = Describe("Endpoint Selection", func() {
 					var selectedEndpoint string
 
 					for _, header := range headerMutation.SetHeaders {
-						if header.Header.Key == "x-gateway-destination-endpoint" {
+						if header.Header.Key == "x-vsr-destination-endpoint" {
 							endpointHeaderFound = true
 							// Check both Value and RawValue since implementation uses RawValue
 							selectedEndpoint = header.Header.Value
diff --git a/src/semantic-router/pkg/headers/headers_test.go b/src/semantic-router/pkg/headers/headers_test.go
@@ -12,7 +12,7 @@ func TestHeaderConstants(t *testing.T) {
 	}{
 		// Request headers
 		{"RequestID", RequestID, "x-request-id"},
-		{"GatewayDestinationEndpoint", GatewayDestinationEndpoint, "x-gateway-destination-endpoint"},
+		{"GatewayDestinationEndpoint", GatewayDestinationEndpoint, "x-vsr-destination-endpoint"},
 		{"SelectedModel", SelectedModel, "x-selected-model"},
 		// VSR headers
 		{"VSRSelectedCategory", VSRSelectedCategory, "x-vsr-selected-category"},
diff --git a/website/docs/api/router.md b/website/docs/api/router.md
@@ -148,7 +148,7 @@ The router adds metadata headers to both requests and responses:
 
 | Header | Description | Example |
 |--------|-------------|---------|
-| `x-gateway-destination-endpoint` | Backend endpoint selected | `endpoint1` |
+| `x-vsr-destination-endpoint` | Backend endpoint selected | `endpoint1` |
 | `x-selected-model` | Model category determined | `mathematics` |
 | `x-routing-confidence` | Classification confidence | `0.956` |
 | `x-request-id` | Unique request identifier | `req-abc123` |
diff --git a/website/docs/installation/ai-gateway/ai-gateway.md b/website/docs/installation/ai-gateway/ai-gateway.md
@@ -10,6 +10,74 @@ The deployment consists of:
 - **Envoy Gateway**: Core gateway functionality and traffic management
 - **Envoy AI Gateway**: AI Gateway built on Envoy Gateway for LLM providers
 
+## Benefits of Integration
+
+Integrating vLLM Semantic Router with Envoy AI Gateway provides enterprise-grade capabilities for production LLM deployments:
+
+### 1. **Hybrid Model Selection**
+
+Seamlessly route requests between cloud LLM providers (OpenAI, Anthropic, etc.) and self-hosted models.
+
+### 2. **Token Rate Limiting**
+
+Protect your infrastructure and control costs with fine-grained rate limiting:
+
+- **Input token limits**: Control request size to prevent abuse
+- **Output token limits**: Manage response generation costs
+- **Total token limits**: Set overall usage quotas per user/tenant
+- **Time-based windows**: Configure limits per second, minute, or hour
+
+### 3. **Model/Provider Failover**
+
+Ensure high availability with automatic failover mechanisms:
+
+- Detect unhealthy backends and route traffic to healthy instances
+- Support for active-passive and active-active failover strategies
+- Graceful degradation when primary models are unavailable
+
+### 4. **Traffic Splitting & Canary Testing**
+
+Deploy new models safely with progressive rollout capabilities:
+
+- **A/B Testing**: Split traffic between model versions to compare performance
+- **Canary Deployments**: Gradually shift traffic to new models (e.g., 5% → 25% → 50% → 100%)
+- **Shadow Traffic**: Send duplicate requests to new models without affecting production
+- **Weight-based routing**: Fine-tune traffic distribution across model variants
+
+### 5. **LLM Observability & Monitoring**
+
+Gain deep insights into your LLM infrastructure:
+
+- **Request/Response Metrics**: Track latency, throughput, token usage, and error rates
+- **Model Performance**: Monitor accuracy, quality scores, and user satisfaction
+- **Cost Analytics**: Analyze spending patterns across models and providers
+- **Distributed Tracing**: End-to-end visibility with OpenTelemetry integration
+- **Custom Dashboards**: Visualize metrics in Prometheus, Grafana, or your preferred monitoring stack
+
+## Supported LLM Providers
+
+| Provider Name                                                | API Schema Config on [AIServiceBackend](https://aigateway.envoyproxy.io/docs/api/#aiservicebackendspec) | Upstream Authentication Config on [BackendSecurityPolicy](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyspec) | Status | Note                                                         |
+| ------------------------------------------------------------ | :----------------------------------------------------------: | :----------------------------------------------------------: | :----: | ------------------------------------------------------------ |
+| [OpenAI](https://platform.openai.com/docs/api-reference)     |              `{"name":"OpenAI","version":"v1"}`              | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    |                                                              |
+| [AWS Bedrock](https://docs.aws.amazon.com/bedrock/latest/APIReference/) |                   `{"name":"AWSBedrock"}`                    | [AWS Bedrock Credentials](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyawscredentials) |   ✅    |                                                              |
+| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference) | `{"name":"AzureOpenAI","version":"2025-01-01-preview"}` or `{"name":"OpenAI", "version": "openai/v1"}` | [Azure Credentials](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyazurecredentials) or [Azure API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyazureapikey) |   ✅    |                                                              |
+| [Google Gemini on AI Studio](https://ai.google.dev/gemini-api/docs/openai) |        `{"name":"OpenAI","version":"v1beta/openai"}`         | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    | Only the OpenAI compatible endpoint                          |
+| [Google Vertex AI](https://cloud.google.com/vertex-ai/docs/reference/rest) |                   `{"name":"GCPVertexAI"}`                   | [GCP Credentials](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicygcpcredentials) |   ✅    |                                                              |
+| [Anthropic on GCP Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude) |   `{"name":"GCPAnthropic", "version":"vertex-2023-10-16"}`   | [GCP Credentials](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicygcpcredentials) |   ✅    | Support both Native Anthropic messages endpoint and OpenAI compatible endpoint |
+| [Groq](https://console.groq.com/docs/openai)                 |          `{"name":"OpenAI","version":"openai/v1"}`           | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    |                                                              |
+| [Grok](https://docs.x.ai/docs/api-reference?utm_source=chatgpt.com#chat-completions) |              `{"name":"OpenAI","version":"v1"}`              | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    |                                                              |
+| [Together AI](https://docs.together.ai/docs/openai-api-compatibility) |              `{"name":"OpenAI","version":"v1"}`              | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    |                                                              |
+| [Cohere](https://docs.cohere.com/v2/docs/compatibility-api)  | `{"name":"Cohere","version":"v2"}` or `{"name":"OpenAI","version":"v1"}` | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    | Supports native Cohere v2 (e.g., /cohere/v2/rerank) and OpenAI-compatible endpoints. |
+| [Mistral](https://docs.mistral.ai/api/#tag/chat/operation/chat_completion_v1_chat_completions_post) |              `{"name":"OpenAI","version":"v1"}`              | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    |                                                              |
+| [DeepInfra](https://deepinfra.com/docs/inference)            |          `{"name":"OpenAI","version":"v1/openai"}`           | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    | Only the OpenAI compatible endpoint                          |
+| [DeepSeek](https://api-docs.deepseek.com/)                   |              `{"name":"OpenAI","version":"v1"}`              | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    |                                                              |
+| [Hunyuan](https://cloud.tencent.com/document/product/1729/111007) |              `{"name":"OpenAI","version":"v1"}`              | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    |                                                              |
+| [Tencent LLM Knowledge Engine](https://www.tencentcloud.com/document/product/1255/70381?lang=en) |              `{"name":"OpenAI","version":"v1"}`              | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    |                                                              |
+| [Tetrate Agent Router Service (TARS)](https://router.tetrate.ai/) |              `{"name":"OpenAI","version":"v1"}`              | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    |                                                              |
+| [SambaNova](https://docs.sambanova.ai/sambastudio/latest/open-ai-api.html) |              `{"name":"OpenAI","version":"v1"}`              | [API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyapikey) |   ✅    |                                                              |
+| [Anthropic](https://docs.claude.com/en/home)                 |                    `{"name":"Anthropic"}`                    | [Anthropic API Key](https://aigateway.envoyproxy.io/docs/api/#backendsecuritypolicyanthropicapikey) |   ✅    | Support only Native Anthropic messages endpoint              |
+| Self-hosted-models                                           |              `{"name":"OpenAI","version":"v1"}`              |                             N/A                              |   ✅    | Depending on the API schema spoken by self-hosted servers. For example, [vLLM](https://docs.vllm.ai/en/v0.8.3/serving/openai_compatible_server.html) speaks the OpenAI format. Also, API Key auth can be configured as well. |
+
 ## Prerequisites
 
 Before starting, ensure you have the following tools installed:
diff --git a/website/docs/overview/architecture/envoy-extproc.md b/website/docs/overview/architecture/envoy-extproc.md
@@ -227,7 +227,7 @@ func (r *OpenAIRouter) handleRequestBody(
     headerMutations := []*core.HeaderValueOption{
         {
             Header: &core.HeaderValue{
-                Key:   "x-gateway-destination-endpoint", 
+                Key:   "x-vsr-destination-endpoint", 
                 Value: selectedEndpoint,
             },
             Append: &wrapperspb.BoolValue{Value: false},
@@ -347,7 +347,7 @@ static_resources:
                   response_code: "%RESPONSE_CODE%"
                   duration: "%DURATION%"
                   selected_model: "%REQ(X-SELECTED-MODEL)%"
-                  selected_endpoint: "%REQ(X-GATEWAY-DESTINATION-ENDPOINT)%"
+                  selected_endpoint: "%REQ(x-vsr-destination-endpoint)%"
                   routing_confidence: "%REQ(X-ROUTING-CONFIDENCE)%"
           
           # Route configuration with dynamic routing
@@ -361,7 +361,7 @@ static_resources:
               - match:
                   prefix: "/"
                   headers:
-                  - name: "x-gateway-destination-endpoint"
+                  - name: "x-vsr-destination-endpoint"
                     string_match:
                       exact: "endpoint1"
                 route:
@@ -370,7 +370,7 @@ static_resources:
               - match:
                   prefix: "/"
                   headers:
-                  - name: "x-gateway-destination-endpoint"  
+                  - name: "x-vsr-destination-endpoint"  
                     string_match:
                       exact: "endpoint2"
                 route:
@@ -379,7 +379,7 @@ static_resources:
               - match:
                   prefix: "/"
                   headers:
-                  - name: "x-gateway-destination-endpoint"
+                  - name: "x-vsr-destination-endpoint"
                     string_match:
                       exact: "endpoint3"
                 route:
diff --git a/website/docs/overview/architecture/system-architecture.md b/website/docs/overview/architecture/system-architecture.md
@@ -238,7 +238,7 @@ graph TB
     
     ToolsSelection --> RoutingDecision[Make Routing Decision<br/>Select Optimal Model]
     
-    RoutingDecision --> SetHeaders[Set Routing Headers<br/>x-gateway-destination-endpoint<br/>x-selected-model]
+    RoutingDecision --> SetHeaders[Set Routing Headers<br/>x-vsr-destination-endpoint<br/>x-selected-model]
     
     SetHeaders --> EnvoyRoute[Envoy Routes to<br/>Selected Backend]
     
diff --git a/website/docs/tutorials/intelligent-route/reasoning.md b/website/docs/tutorials/intelligent-route/reasoning.md
@@ -145,7 +145,7 @@ Verify routing via response headers
 The router does not inject routing metadata into the JSON body. Instead, inspect the response headers added by the router:
 
 - X-Selected-Model
-- X-GATEWAY-DESTINATION-ENDPOINT
+- x-vsr-destination-endpoint
 
 Example:
 
@@ -161,7 +161,7 @@ curl -i http://localhost:8801/v1/chat/completions \
   }'
 # In the response headers, look for:
 #   X-Selected-Model: <your-selected-model>
-#   X-GATEWAY-DESTINATION-ENDPOINT: <address:port>
+#   x-vsr-destination-endpoint: <address:port>
 ```
 
 4) Run a comprehensive evaluation
diff --git a/website/docs/tutorials/observability/distributed-tracing.md b/website/docs/tutorials/observability/distributed-tracing.md
@@ -377,7 +377,7 @@ tracestate: vendor=value
 
 ```
 traceparent: 00-abc123-ghi789-01
-x-gateway-destination-endpoint: endpoint1
+x-vsr-destination-endpoint: endpoint1
 x-selected-model: gpt-4
 ```