Skip to content

Commit 7d55d4a

Browse files
committed
fix(envoy): use ORIGINAL_DST cluster for dynamic routing
Replace static cluster routing with ORIGINAL_DST cluster type to fix routing bug where all requests were going to model_b_cluster. **Problem:** - Envoy evaluates routes BEFORE ExtProc filter runs - Header-based routing never matched because header wasn't set yet - All requests fell through to default route (model_b_cluster) - Router selected Model-A but Envoy routed to Model-B **Solution:** - Use ORIGINAL_DST cluster with use_http_header: true - Cluster reads x-gateway-destination-endpoint header set by ExtProc - Routes to correct endpoint (127.0.0.1:8000 or 8001) dynamically **Testing:** Verified with Envoy logs showing: - selected_model: Model-A, upstream_host: 127.0.0.1:8001 (WRONG - before fix) - After fix: destination determined by header value This aligns OpenShift config with local config/envoy.yaml approach. Signed-off-by: Yossi Ovadia <[email protected]>
1 parent c6b2b60 commit 7d55d4a

File tree

1 file changed

+13
-57
lines changed

1 file changed

+13
-57
lines changed

deploy/openshift/envoy-openshift.yaml

Lines changed: 13 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# OpenShift-specific Envoy configuration
2-
# This config uses static clusters instead of ORIGINAL_DST to work with Kubernetes networking
3-
# The main difference from config/envoy.yaml is the use of static clusters for math_specialist_cluster
4-
# and coding_specialist_cluster that point to localhost ports 8000 and 8001 respectively.
2+
# This config uses ORIGINAL_DST cluster with header-based destination selection
3+
# The semantic router sets the x-gateway-destination-endpoint header which Envoy uses
4+
# to dynamically route to the correct vLLM endpoint (port 8000 or 8001)
55
static_resources:
66
listeners:
77
- name: listener_0
@@ -48,29 +48,11 @@ static_resources:
4848
route:
4949
cluster: semantic_router_cluster
5050
timeout: 300s
51-
# Route to Model-A for Model-A requests
51+
# Dynamic route - destination determined by x-gateway-destination-endpoint header
5252
- match:
5353
prefix: "/"
54-
headers:
55-
- name: "x-gateway-destination-endpoint"
56-
exact_match: "127.0.0.1:8000"
5754
route:
58-
cluster: model_a_cluster
59-
timeout: 300s
60-
# Route to Model-B for Model-B requests
61-
- match:
62-
prefix: "/"
63-
headers:
64-
- name: "x-gateway-destination-endpoint"
65-
exact_match: "127.0.0.1:8001"
66-
route:
67-
cluster: model_b_cluster
68-
timeout: 300s
69-
# Default route to Model-B (fallback)
70-
- match:
71-
prefix: "/"
72-
route:
73-
cluster: model_b_cluster
55+
cluster: vllm_dynamic_cluster
7456
timeout: 300s
7557
http_filters:
7658
- name: envoy.filters.http.ext_proc
@@ -147,42 +129,16 @@ static_resources:
147129
explicit_http_config:
148130
http_protocol_options: {}
149131

150-
# Static cluster for Model-A (OpenShift-specific)
151-
- name: model_a_cluster
152-
connect_timeout: 300s
153-
per_connection_buffer_limit_bytes: 52428800
154-
type: STATIC
155-
lb_policy: ROUND_ROBIN
156-
load_assignment:
157-
cluster_name: model_a_cluster
158-
endpoints:
159-
- lb_endpoints:
160-
- endpoint:
161-
address:
162-
socket_address:
163-
address: 127.0.0.1
164-
port_value: 8000
165-
typed_extension_protocol_options:
166-
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
167-
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
168-
explicit_http_config:
169-
http_protocol_options: {}
170-
171-
# Static cluster for Model-B (OpenShift-specific)
172-
- name: model_b_cluster
132+
# Dynamic vLLM cluster using ORIGINAL_DST with header-based destination
133+
# The semantic router sets x-gateway-destination-endpoint header to specify the target
134+
- name: vllm_dynamic_cluster
173135
connect_timeout: 300s
174136
per_connection_buffer_limit_bytes: 52428800
175-
type: STATIC
176-
lb_policy: ROUND_ROBIN
177-
load_assignment:
178-
cluster_name: model_b_cluster
179-
endpoints:
180-
- lb_endpoints:
181-
- endpoint:
182-
address:
183-
socket_address:
184-
address: 127.0.0.1
185-
port_value: 8001
137+
type: ORIGINAL_DST
138+
lb_policy: CLUSTER_PROVIDED
139+
original_dst_lb_config:
140+
use_http_header: true
141+
http_header_name: "x-gateway-destination-endpoint"
186142
typed_extension_protocol_options:
187143
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
188144
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions

0 commit comments

Comments
 (0)