|
57 | 57 | {{- if gt (.Values.inferenceExtension.replicas | int) 1 }} |
58 | 58 | - --ha-enable-leader-election |
59 | 59 | {{- end }} |
| 60 | + {{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 61 | + - --enable-latency-predictor |
| 62 | + {{- end }} |
60 | 63 | # Pass additional flags via the inferenceExtension.flags field in values.yaml. |
61 | 64 | {{- range $key, $value := .Values.inferenceExtension.flags }} |
62 | 65 | - --{{ $key }} |
@@ -108,6 +111,20 @@ spec: |
108 | 111 | valueFrom: |
109 | 112 | fieldRef: |
110 | 113 | fieldPath: metadata.namespace |
| 114 | + {{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 115 | + - name: PREDICTION_SERVER_URL |
| 116 | + value: "{{- $count := int .Values.inferenceExtension.latencyPredictor.predictionServers.count -}} |
| 117 | + {{- $startPort := int .Values.inferenceExtension.latencyPredictor.predictionServers.startPort -}} |
| 118 | + {{- range $i := until $count -}} |
| 119 | + {{- if $i }},{{ end }}http://localhost:{{ add $startPort $i }} |
| 120 | + {{- end }}" |
| 121 | + - name: TRAINING_SERVER_URL |
| 122 | + value: "http://localhost:{{ .Values.inferenceExtension.latencyPredictor.trainingServer.port }}" |
| 123 | + {{- range $key, $value := .Values.inferenceExtension.latencyPredictor.eppEnv }} |
| 124 | + - name: {{ $key }} |
| 125 | + value: {{ $value | quote }} |
| 126 | + {{- end }} |
| 127 | + {{- end }} |
111 | 128 | {{- if .Values.inferenceExtension.tracing.enabled }} |
112 | 129 | - name: OTEL_SERVICE_NAME |
113 | 130 | value: "gateway-api-inference-extension" |
@@ -138,10 +155,91 @@ spec: |
138 | 155 | volumeMounts: |
139 | 156 | - name: plugins-config-volume |
140 | 157 | mountPath: "/config" |
| 158 | + {{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 159 | + # Training Server Sidecar Container |
| 160 | + - name: training-server |
| 161 | + image: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.hub }}/{{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.name }}:{{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.tag }} |
| 162 | + imagePullPolicy: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.pullPolicy }} |
| 163 | + ports: |
| 164 | + - containerPort: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.port }} |
| 165 | + name: training-port |
| 166 | + livenessProbe: |
| 167 | + {{- toYaml .Values.inferenceExtension.latencyPredictor.trainingServer.livenessProbe | nindent 10 }} |
| 168 | + readinessProbe: |
| 169 | + {{- toYaml .Values.inferenceExtension.latencyPredictor.trainingServer.readinessProbe | nindent 10 }} |
| 170 | + resources: |
| 171 | + {{- toYaml .Values.inferenceExtension.latencyPredictor.trainingServer.resources | nindent 10 }} |
| 172 | + envFrom: |
| 173 | + - configMapRef: |
| 174 | + name: {{ include "gateway-api-inference-extension.name" . }}-latency-predictor-training |
| 175 | + env: |
| 176 | + - name: POD_NAME |
| 177 | + valueFrom: |
| 178 | + fieldRef: |
| 179 | + fieldPath: metadata.name |
| 180 | + - name: SERVER_TYPE |
| 181 | + value: "training" |
| 182 | + volumeMounts: |
| 183 | + - name: training-server-storage |
| 184 | + mountPath: /models |
| 185 | + {{- range $i := until (int .Values.inferenceExtension.latencyPredictor.predictionServers.count) }} |
| 186 | + # Prediction Server Sidecar Container {{ add $i 1 }} |
| 187 | + - name: prediction-server-{{ add $i 1 }} |
| 188 | + image: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.hub }}/{{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.name }}:{{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.tag }} |
| 189 | + imagePullPolicy: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.pullPolicy }} |
| 190 | + command: ["uvicorn"] |
| 191 | + args: ["prediction_server:app", "--host", "0.0.0.0", "--port", "{{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }}"] |
| 192 | + ports: |
| 193 | + - containerPort: {{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }} |
| 194 | + name: predict-port-{{ add $i 1 }} |
| 195 | + livenessProbe: |
| 196 | + httpGet: |
| 197 | + path: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.livenessProbe.httpGet.path }} |
| 198 | + port: {{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }} |
| 199 | + initialDelaySeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.livenessProbe.initialDelaySeconds }} |
| 200 | + periodSeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.livenessProbe.periodSeconds }} |
| 201 | + readinessProbe: |
| 202 | + httpGet: |
| 203 | + path: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.httpGet.path }} |
| 204 | + port: {{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }} |
| 205 | + initialDelaySeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.initialDelaySeconds }} |
| 206 | + periodSeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.periodSeconds }} |
| 207 | + failureThreshold: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.failureThreshold }} |
| 208 | + resources: |
| 209 | + {{- toYaml $.Values.inferenceExtension.latencyPredictor.predictionServers.resources | nindent 10 }} |
| 210 | + envFrom: |
| 211 | + - configMapRef: |
| 212 | + name: {{ include "gateway-api-inference-extension.name" $ }}-latency-predictor-prediction |
| 213 | + env: |
| 214 | + - name: PREDICT_PORT |
| 215 | + value: "{{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }}" |
| 216 | + - name: POD_NAME |
| 217 | + valueFrom: |
| 218 | + fieldRef: |
| 219 | + fieldPath: metadata.name |
| 220 | + - name: SERVER_TYPE |
| 221 | + value: "prediction-{{ add $i 1 }}" |
| 222 | + - name: TRAINING_SERVER_URL |
| 223 | + value: "http://localhost:{{ $.Values.inferenceExtension.latencyPredictor.trainingServer.port }}" |
| 224 | + volumeMounts: |
| 225 | + - name: prediction-server-{{ add $i 1 }}-storage |
| 226 | + mountPath: /server_models |
| 227 | + {{- end }} |
| 228 | + {{- end }} |
141 | 229 | volumes: |
142 | 230 | - name: plugins-config-volume |
143 | 231 | configMap: |
144 | 232 | name: {{ include "gateway-api-inference-extension.name" . }} |
| 233 | + {{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 234 | + - name: training-server-storage |
| 235 | + emptyDir: |
| 236 | + sizeLimit: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.volumeSize }} |
| 237 | + {{- range $i := until (int .Values.inferenceExtension.latencyPredictor.predictionServers.count) }} |
| 238 | + - name: prediction-server-{{ add $i 1 }}-storage |
| 239 | + emptyDir: |
| 240 | + sizeLimit: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.volumeSize }} |
| 241 | + {{- end }} |
| 242 | + {{- end }} |
145 | 243 | {{- if .Values.inferenceExtension.affinity }} |
146 | 244 | affinity: |
147 | 245 | {{- toYaml .Values.inferenceExtension.affinity | nindent 8 }} |
|
0 commit comments