| 
1 | 1 | # OpenShift-specific Envoy configuration  | 
2 |  | -# This config uses static clusters instead of ORIGINAL_DST to work with Kubernetes networking  | 
3 |  | -# The main difference from config/envoy.yaml is the use of static clusters for math_specialist_cluster  | 
4 |  | -# and coding_specialist_cluster that point to localhost ports 8000 and 8001 respectively.  | 
 | 2 | +# This config uses ORIGINAL_DST cluster with header-based destination selection  | 
 | 3 | +# The semantic router sets the x-gateway-destination-endpoint header which Envoy uses  | 
 | 4 | +# to dynamically route to the correct vLLM endpoint (port 8000 or 8001)  | 
5 | 5 | static_resources:  | 
6 | 6 |   listeners:  | 
7 | 7 |   - name: listener_0  | 
@@ -48,29 +48,11 @@ static_resources:  | 
48 | 48 |                 route:  | 
49 | 49 |                   cluster: semantic_router_cluster  | 
50 | 50 |                   timeout: 300s  | 
51 |  | -              # Route to Model-A for Model-A requests  | 
 | 51 | +              # Dynamic route - destination determined by x-gateway-destination-endpoint header  | 
52 | 52 |               - match:  | 
53 | 53 |                   prefix: "/"  | 
54 |  | -                  headers:  | 
55 |  | -                  - name: "x-gateway-destination-endpoint"  | 
56 |  | -                    exact_match: "127.0.0.1:8000"  | 
57 | 54 |                 route:  | 
58 |  | -                  cluster: model_a_cluster  | 
59 |  | -                  timeout: 300s  | 
60 |  | -              # Route to Model-B for Model-B requests  | 
61 |  | -              - match:  | 
62 |  | -                  prefix: "/"  | 
63 |  | -                  headers:  | 
64 |  | -                  - name: "x-gateway-destination-endpoint"  | 
65 |  | -                    exact_match: "127.0.0.1:8001"  | 
66 |  | -                route:  | 
67 |  | -                  cluster: model_b_cluster  | 
68 |  | -                  timeout: 300s  | 
69 |  | -              # Default route to Model-B (fallback)  | 
70 |  | -              - match:  | 
71 |  | -                  prefix: "/"  | 
72 |  | -                route:  | 
73 |  | -                  cluster: model_b_cluster  | 
 | 55 | +                  cluster: vllm_dynamic_cluster  | 
74 | 56 |                   timeout: 300s  | 
75 | 57 |           http_filters:  | 
76 | 58 |           - name: envoy.filters.http.ext_proc  | 
@@ -147,42 +129,16 @@ static_resources:  | 
147 | 129 |         explicit_http_config:  | 
148 | 130 |           http_protocol_options: {}  | 
149 | 131 | 
 
  | 
150 |  | -  # Static cluster for Model-A (OpenShift-specific)  | 
151 |  | -  - name: model_a_cluster  | 
152 |  | -    connect_timeout: 300s  | 
153 |  | -    per_connection_buffer_limit_bytes: 52428800  | 
154 |  | -    type: STATIC  | 
155 |  | -    lb_policy: ROUND_ROBIN  | 
156 |  | -    load_assignment:  | 
157 |  | -      cluster_name: model_a_cluster  | 
158 |  | -      endpoints:  | 
159 |  | -      - lb_endpoints:  | 
160 |  | -        - endpoint:  | 
161 |  | -            address:  | 
162 |  | -              socket_address:  | 
163 |  | -                address: 127.0.0.1  | 
164 |  | -                port_value: 8000  | 
165 |  | -    typed_extension_protocol_options:  | 
166 |  | -      envoy.extensions.upstreams.http.v3.HttpProtocolOptions:  | 
167 |  | -        "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions  | 
168 |  | -        explicit_http_config:  | 
169 |  | -          http_protocol_options: {}  | 
170 |  | - | 
171 |  | -  # Static cluster for Model-B (OpenShift-specific)  | 
172 |  | -  - name: model_b_cluster  | 
 | 132 | +  # Dynamic vLLM cluster using ORIGINAL_DST with header-based destination  | 
 | 133 | +  # The semantic router sets x-gateway-destination-endpoint header to specify the target  | 
 | 134 | +  - name: vllm_dynamic_cluster  | 
173 | 135 |     connect_timeout: 300s  | 
174 | 136 |     per_connection_buffer_limit_bytes: 52428800  | 
175 |  | -    type: STATIC  | 
176 |  | -    lb_policy: ROUND_ROBIN  | 
177 |  | -    load_assignment:  | 
178 |  | -      cluster_name: model_b_cluster  | 
179 |  | -      endpoints:  | 
180 |  | -      - lb_endpoints:  | 
181 |  | -        - endpoint:  | 
182 |  | -            address:  | 
183 |  | -              socket_address:  | 
184 |  | -                address: 127.0.0.1  | 
185 |  | -                port_value: 8001  | 
 | 137 | +    type: ORIGINAL_DST  | 
 | 138 | +    lb_policy: CLUSTER_PROVIDED  | 
 | 139 | +    original_dst_lb_config:  | 
 | 140 | +      use_http_header: true  | 
 | 141 | +      http_header_name: "x-gateway-destination-endpoint"  | 
186 | 142 |     typed_extension_protocol_options:  | 
187 | 143 |       envoy.extensions.upstreams.http.v3.HttpProtocolOptions:  | 
188 | 144 |         "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions  | 
 | 
0 commit comments