|
96 | 96 | {{- if gt (.Values.inferenceExtension.replicas | int) 1 }} |
97 | 97 | - --ha-enable-leader-election |
98 | 98 | {{- end }} |
| 99 | + {{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 100 | + - --enable-latency-predictor |
| 101 | + {{- end }} |
99 | 102 | # Pass additional flags via the inferenceExtension.flags field in values.yaml. |
100 | 103 | {{- range $key, $value := .Values.inferenceExtension.flags }} |
101 | 104 | - --{{ $key }} |
@@ -147,6 +150,20 @@ spec: |
147 | 150 | valueFrom: |
148 | 151 | fieldRef: |
149 | 152 | fieldPath: metadata.namespace |
| 153 | + {{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 154 | + - name: PREDICTION_SERVER_URL |
| 155 | + value: "{{- $count := int .Values.inferenceExtension.latencyPredictor.predictionServers.count -}} |
| 156 | + {{- $startPort := int .Values.inferenceExtension.latencyPredictor.predictionServers.startPort -}} |
| 157 | + {{- range $i := until $count -}} |
| 158 | + {{- if $i }},{{ end }}http://localhost:{{ add $startPort $i }} |
| 159 | + {{- end }}" |
| 160 | + - name: TRAINING_SERVER_URL |
| 161 | + value: "http://localhost:{{ .Values.inferenceExtension.latencyPredictor.trainingServer.port }}" |
| 162 | + {{- range $key, $value := .Values.inferenceExtension.latencyPredictor.eppEnv }} |
| 163 | + - name: {{ $key }} |
| 164 | + value: {{ $value | quote }} |
| 165 | + {{- end }} |
| 166 | + {{- end }} |
150 | 167 | {{- if .Values.inferenceExtension.tracing.enabled }} |
151 | 168 | - name: OTEL_SERVICE_NAME |
152 | 169 | value: "gateway-api-inference-extension" |
@@ -177,13 +194,94 @@ spec: |
177 | 194 | volumeMounts: |
178 | 195 | - name: plugins-config-volume |
179 | 196 | mountPath: "/config" |
| 197 | + {{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 198 | + # Training Server Sidecar Container |
| 199 | + - name: training-server |
| 200 | + image: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.hub }}/{{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.name }}:{{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.tag }} |
| 201 | + imagePullPolicy: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.pullPolicy }} |
| 202 | + ports: |
| 203 | + - containerPort: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.port }} |
| 204 | + name: training-port |
| 205 | + livenessProbe: |
| 206 | + {{- toYaml .Values.inferenceExtension.latencyPredictor.trainingServer.livenessProbe | nindent 10 }} |
| 207 | + readinessProbe: |
| 208 | + {{- toYaml .Values.inferenceExtension.latencyPredictor.trainingServer.readinessProbe | nindent 10 }} |
| 209 | + resources: |
| 210 | + {{- toYaml .Values.inferenceExtension.latencyPredictor.trainingServer.resources | nindent 10 }} |
| 211 | + envFrom: |
| 212 | + - configMapRef: |
| 213 | + name: {{ include "gateway-api-inference-extension.name" . }}-latency-predictor-training |
| 214 | + env: |
| 215 | + - name: POD_NAME |
| 216 | + valueFrom: |
| 217 | + fieldRef: |
| 218 | + fieldPath: metadata.name |
| 219 | + - name: SERVER_TYPE |
| 220 | + value: "training" |
| 221 | + volumeMounts: |
| 222 | + - name: training-server-storage |
| 223 | + mountPath: /models |
| 224 | + {{- range $i := until (int .Values.inferenceExtension.latencyPredictor.predictionServers.count) }} |
| 225 | + # Prediction Server Sidecar Container {{ add $i 1 }} |
| 226 | + - name: prediction-server-{{ add $i 1 }} |
| 227 | + image: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.hub }}/{{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.name }}:{{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.tag }} |
| 228 | + imagePullPolicy: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.pullPolicy }} |
| 229 | + command: ["uvicorn"] |
| 230 | + args: ["prediction_server:app", "--host", "0.0.0.0", "--port", "{{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }}"] |
| 231 | + ports: |
| 232 | + - containerPort: {{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }} |
| 233 | + name: predict-port-{{ add $i 1 }} |
| 234 | + livenessProbe: |
| 235 | + httpGet: |
| 236 | + path: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.livenessProbe.httpGet.path }} |
| 237 | + port: {{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }} |
| 238 | + initialDelaySeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.livenessProbe.initialDelaySeconds }} |
| 239 | + periodSeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.livenessProbe.periodSeconds }} |
| 240 | + readinessProbe: |
| 241 | + httpGet: |
| 242 | + path: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.httpGet.path }} |
| 243 | + port: {{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }} |
| 244 | + initialDelaySeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.initialDelaySeconds }} |
| 245 | + periodSeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.periodSeconds }} |
| 246 | + failureThreshold: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.failureThreshold }} |
| 247 | + resources: |
| 248 | + {{- toYaml $.Values.inferenceExtension.latencyPredictor.predictionServers.resources | nindent 10 }} |
| 249 | + envFrom: |
| 250 | + - configMapRef: |
| 251 | + name: {{ include "gateway-api-inference-extension.name" $ }}-latency-predictor-prediction |
| 252 | + env: |
| 253 | + - name: PREDICT_PORT |
| 254 | + value: "{{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }}" |
| 255 | + - name: POD_NAME |
| 256 | + valueFrom: |
| 257 | + fieldRef: |
| 258 | + fieldPath: metadata.name |
| 259 | + - name: SERVER_TYPE |
| 260 | + value: "prediction-{{ add $i 1 }}" |
| 261 | + - name: TRAINING_SERVER_URL |
| 262 | + value: "http://localhost:{{ $.Values.inferenceExtension.latencyPredictor.trainingServer.port }}" |
| 263 | + volumeMounts: |
| 264 | + - name: prediction-server-{{ add $i 1 }}-storage |
| 265 | + mountPath: /server_models |
| 266 | + {{- end }} |
| 267 | + {{- end }} |
180 | 268 | volumes: |
181 | 269 | {{- if .Values.inferenceExtension.sidecar.volumes }} |
182 | 270 | {{- tpl (toYaml .Values.inferenceExtension.sidecar.volumes) $ | nindent 6 }} |
183 | 271 | {{- end }} |
184 | 272 | - name: plugins-config-volume |
185 | 273 | configMap: |
186 | 274 | name: {{ include "gateway-api-inference-extension.name" . }} |
| 275 | + {{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 276 | + - name: training-server-storage |
| 277 | + emptyDir: |
| 278 | + sizeLimit: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.volumeSize }} |
| 279 | + {{- range $i := until (int .Values.inferenceExtension.latencyPredictor.predictionServers.count) }} |
| 280 | + - name: prediction-server-{{ add $i 1 }}-storage |
| 281 | + emptyDir: |
| 282 | + sizeLimit: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.volumeSize }} |
| 283 | + {{- end }} |
| 284 | + {{- end }} |
187 | 285 | {{- if .Values.inferenceExtension.affinity }} |
188 | 286 | affinity: |
189 | 287 | {{- toYaml .Values.inferenceExtension.affinity | nindent 8 }} |
|
0 commit comments