Skip to content

Commit bbf8a6c

Browse files
committed
update
Signed-off-by: bitliu <[email protected]>
1 parent 1405550 commit bbf8a6c

File tree

2 files changed

+35
-139
lines changed

2 files changed

+35
-139
lines changed

deploy/kubernetes/ai-gateway/aigw-resources/gwapi-resources.yaml

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,21 @@ spec:
3838
kind: EnvoyProxy
3939
name: semantic-router
4040
---
41+
# By default, Envoy Gateway sets the buffer limit to 32kiB which is not sufficient for AI workloads.
42+
# This ClientTrafficPolicy sets the buffer limit to 50MiB as an example.
43+
apiVersion: gateway.envoyproxy.io/v1alpha1
44+
kind: ClientTrafficPolicy
45+
metadata:
46+
name: semantic-router
47+
namespace: default
48+
spec:
49+
targetRefs:
50+
- group: gateway.networking.k8s.io
51+
kind: Gateway
52+
name: semantic-router
53+
connection:
54+
bufferLimit: 50Mi
55+
---
4156
apiVersion: gateway.envoyproxy.io/v1alpha1
4257
kind: ClientTrafficPolicy
4358
metadata:
@@ -69,41 +84,59 @@ spec:
6984
value: math-expert
7085
backendRefs:
7186
- name: vllm-llama3-8b-instruct
87+
timeouts:
88+
request: 60s
89+
backendRequest: 60s
7290
- matches:
7391
- headers:
7492
- type: Exact
7593
name: x-ai-eg-model
7694
value: science-expert
7795
backendRefs:
7896
- name: vllm-llama3-8b-instruct
97+
timeouts:
98+
request: 60s
99+
backendRequest: 60s
79100
- matches:
80101
- headers:
81102
- type: Exact
82103
name: x-ai-eg-model
83104
value: social-expert
84105
backendRefs:
85106
- name: vllm-llama3-8b-instruct
107+
timeouts:
108+
request: 60s
109+
backendRequest: 60s
86110
- matches:
87111
- headers:
88112
- type: Exact
89113
name: x-ai-eg-model
90114
value: humanities-expert
91115
backendRefs:
92116
- name: vllm-llama3-8b-instruct
117+
timeouts:
118+
request: 60s
119+
backendRequest: 60s
93120
- matches:
94121
- headers:
95122
- type: Exact
96123
name: x-ai-eg-model
97124
value: law-expert
98125
backendRefs:
99126
- name: vllm-llama3-8b-instruct
127+
timeouts:
128+
request: 60s
129+
backendRequest: 60s
100130
- matches:
101131
- headers:
102132
- type: Exact
103133
name: x-ai-eg-model
104134
value: general-expert
105135
backendRefs:
106136
- name: vllm-llama3-8b-instruct
137+
timeouts:
138+
request: 60s
139+
backendRequest: 60s
107140
---
108141
apiVersion: gateway.envoyproxy.io/v1alpha1
109142
kind: EnvoyPatchPolicy
@@ -126,7 +159,7 @@ spec:
126159
authority: semantic-router.vllm-semantic-router-system:50051
127160
clusterName: semantic-router
128161
timeout: 60s
129-
message_timeout: 10s
162+
message_timeout: 60s
130163
processing_mode:
131164
request_body_mode: BUFFERED
132165
request_header_mode: SEND
@@ -140,7 +173,7 @@ spec:
140173
op: add
141174
path: ''
142175
value:
143-
connect_timeout: 10s
176+
connect_timeout: 60s
144177
http2_protocol_options: {}
145178
lb_policy: ROUND_ROBIN
146179
load_assignment:

docs/default-model-fallback.md

Lines changed: 0 additions & 137 deletions
This file was deleted.

0 commit comments

Comments
 (0)