File tree Expand file tree Collapse file tree 2 files changed +35
-139
lines changed
deploy/kubernetes/ai-gateway/aigw-resources Expand file tree Collapse file tree 2 files changed +35
-139
lines changed Original file line number Diff line number Diff line change 3838 kind : EnvoyProxy
3939 name : semantic-router
4040---
41+ # By default, Envoy Gateway sets the buffer limit to 32kiB which is not sufficient for AI workloads.
42+ # This ClientTrafficPolicy sets the buffer limit to 50MiB as an example.
43+ apiVersion : gateway.envoyproxy.io/v1alpha1
44+ kind : ClientTrafficPolicy
45+ metadata :
46+ name : semantic-router
47+ namespace : default
48+ spec :
49+ targetRefs :
50+ - group : gateway.networking.k8s.io
51+ kind : Gateway
52+ name : semantic-router
53+ connection :
54+ bufferLimit : 50Mi
55+ ---
4156apiVersion : gateway.envoyproxy.io/v1alpha1
4257kind : ClientTrafficPolicy
4358metadata :
@@ -69,41 +84,59 @@ spec:
6984 value : math-expert
7085 backendRefs :
7186 - name : vllm-llama3-8b-instruct
87+ timeouts :
88+ request : 60s
89+ backendRequest : 60s
7290 - matches :
7391 - headers :
7492 - type : Exact
7593 name : x-ai-eg-model
7694 value : science-expert
7795 backendRefs :
7896 - name : vllm-llama3-8b-instruct
97+ timeouts :
98+ request : 60s
99+ backendRequest : 60s
79100 - matches :
80101 - headers :
81102 - type : Exact
82103 name : x-ai-eg-model
83104 value : social-expert
84105 backendRefs :
85106 - name : vllm-llama3-8b-instruct
107+ timeouts :
108+ request : 60s
109+ backendRequest : 60s
86110 - matches :
87111 - headers :
88112 - type : Exact
89113 name : x-ai-eg-model
90114 value : humanities-expert
91115 backendRefs :
92116 - name : vllm-llama3-8b-instruct
117+ timeouts :
118+ request : 60s
119+ backendRequest : 60s
93120 - matches :
94121 - headers :
95122 - type : Exact
96123 name : x-ai-eg-model
97124 value : law-expert
98125 backendRefs :
99126 - name : vllm-llama3-8b-instruct
127+ timeouts :
128+ request : 60s
129+ backendRequest : 60s
100130 - matches :
101131 - headers :
102132 - type : Exact
103133 name : x-ai-eg-model
104134 value : general-expert
105135 backendRefs :
106136 - name : vllm-llama3-8b-instruct
137+ timeouts :
138+ request : 60s
139+ backendRequest : 60s
107140---
108141apiVersion : gateway.envoyproxy.io/v1alpha1
109142kind : EnvoyPatchPolicy
@@ -126,7 +159,7 @@ spec:
126159 authority : semantic-router.vllm-semantic-router-system:50051
127160 clusterName : semantic-router
128161 timeout : 60s
129- message_timeout : 10s
162+ message_timeout : 60s
130163 processing_mode :
131164 request_body_mode : BUFFERED
132165 request_header_mode : SEND
@@ -140,7 +173,7 @@ spec:
140173 op : add
141174 path : ' '
142175 value :
143- connect_timeout : 10s
176+ connect_timeout : 60s
144177 http2_protocol_options : {}
145178 lb_policy : ROUND_ROBIN
146179 load_assignment :
Load Diff This file was deleted.
You can’t perform that action at this time.
0 commit comments