Skip to content

Commit 137a0b4

Browse files
authored
Ensure EPP flags are configurable via Helm chart (#1302)
* Ensure EPP flags are configurable via Helm chart * Cleanup and remove customvalues * Address review comments * Rename eppFlags to flags * Fix spacing
1 parent d1fe78f commit 137a0b4

File tree

3 files changed

+58
-34
lines changed

3 files changed

+58
-34
lines changed

config/charts/inferencepool/README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,10 @@ The following table list the configurable parameters of the chart.
119119
| `inferenceExtension.image.hub` | Registry URL where the endpoint picker image is hosted. |
120120
| `inferenceExtension.image.tag` | Image tag of the endpoint picker. |
121121
| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
122-
| `inferenceExtension.extProcPort` | Port where the endpoint picker service is served for external processing. Defaults to `9002`. |
123122
| `inferenceExtension.env` | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`. |
124-
| `inferenceExtension.extraContainerPorts` | List of additional container ports to expose. Defaults to `[]`. |
125-
| `inferenceExtension.extraServicePorts` | List of additional service ports to expose. Defaults to `[]`. |
126-
| `inferenceExtension.logVerbosity` | Logging verbosity level for the endpoint picker. Defaults to `"3"`. |
123+
| `inferenceExtension.extraContainerPorts` | List of additional container ports to expose. Defaults to `[]`. |
124+
| `inferenceExtension.extraServicePorts` | List of additional service ports to expose. Defaults to `[]`. |
125+
| `inferenceExtension.flags` | List of flags which are passed through to endpoint picker. |
127126
| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`. |
128127
| `inferenceExtension.enableLeaderElection` | Enable leader election for high availability. When enabled, only one EPP pod (the leader) will be ready to serve traffic. It is recommended to set `inferenceExtension.replicas` to a value greater than 1 when this is set to `true`. Defaults to `false`. |
129128

config/charts/inferencepool/templates/epp-deployment.yaml

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,25 +27,10 @@ spec:
2727
- {{ .Release.Name }}
2828
- --pool-namespace
2929
- {{ .Release.Namespace }}
30-
- --v
31-
- "{{ .Values.inferenceExtension.logVerbosity | default "3" }}"
32-
- --grpc-port
33-
- "9002"
34-
- --grpc-health-port
35-
- "9003"
3630
- --zap-encoder
3731
- "json"
38-
- --metrics-port
39-
- "9090"
40-
- --config-file
41-
- "config/{{ .Values.inferenceExtension.pluginsConfigFile }}"
42-
# https://pkg.go.dev/flag#hdr-Command_line_flag_syntax; space is only for non-bool flags
43-
- "--enable-pprof={{ .Values.inferenceExtension.enablePprof }}"
44-
- "--model-server-metrics-path={{ .Values.inferenceExtension.modelServerMetricsPath }}"
45-
- "--model-server-metrics-scheme={{ .Values.inferenceExtension.modelServerMetricsScheme }}"
46-
- "--model-server-metrics-https-insecure-skip-verify={{ .Values.inferenceExtension.modelServerMetricsHttpsInsecureSkipVerify }}"
47-
{{- if .Values.inferenceExtension.enableLeaderElection }}
48-
- "--ha-enable-leader-election"
32+
{{- range .Values.inferenceExtension.flags }}
33+
- "--{{ .name }}={{ .value }}"
4934
{{- end }}
5035
{{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
5136
- --total-queued-requests-metric

config/charts/inferencepool/values.yaml

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@ inferenceExtension:
77
pullPolicy: Always
88
extProcPort: 9002
99
env: []
10-
enablePprof: true # Enable pprof handlers for profiling and debugging
11-
modelServerMetricsPath: "/metrics"
12-
modelServerMetricsScheme: "http"
13-
modelServerMetricsHttpsInsecureSkipVerify: true
14-
# This is the plugins configuration file.
1510
pluginsConfigFile: "default-plugins.yaml"
11+
# Define additional container ports
12+
extraContainerPorts: []
13+
# Define additional service ports
14+
extraServicePorts: []
15+
16+
# This is the plugins configuration file.
1617
# pluginsCustomConfig:
1718
# custom-plugins.yaml: |
1819
# apiVersion: inference.networking.x-k8s.io/v1alpha1
@@ -30,19 +31,58 @@ inferenceExtension:
3031
# env:
3132
# ENABLE_EXPERIMENTAL_FEATURE: "true"
3233

33-
# Define additional container ports
34-
extraContainerPorts: []
35-
# Define additional service ports
36-
extraServicePorts: []
37-
# Enable leader election for high availability. When enabled, it is recommended to set replicas > 1.
38-
# Only the leader pod will be ready to serve traffic.
39-
enableLeaderElection: false
34+
flags:
35+
- name: grpc-port
36+
value: 9002
37+
- name: grpc-health-port
38+
value: 9003
39+
- name: metrics-port
40+
value: 9090
41+
- name: enable-pprof
42+
value: "true" # Enable pprof handlers for profiling and debugging
43+
- name: pool-group
44+
value: "inference.networking.k8s.io"
45+
# Log verbosity
46+
- name: v
47+
value: 1
48+
- name: secure-serving
49+
value: "true"
50+
- name: health-checking
51+
value: "false"
52+
- name: cert-path
53+
value: ""
54+
- name: total-queued-requests-metric
55+
value: "vllm:num_requests_waiting"
56+
- name: kv-cache-usage-percentage-metric
57+
value: "vllm:gpu_cache_usage_perc"
58+
- name: lora-info-metric
59+
value: "vllm:lora_requests_info"
60+
- name: refresh-metrics-interval
61+
value: "50ms"
62+
- name: refresh-prometheus-metrics-interval
63+
value: "5s"
64+
- name: metrics-staleness-threshold
65+
value: "2s"
66+
- name: config-file
67+
value: ""
68+
- name: config-text
69+
value: ""
70+
- name: model-server-metrics-port
71+
value: 0
72+
- name: model-server-metrics-path
73+
value: "/metrics"
74+
- name: model-server-metrics-scheme
75+
value: "http"
76+
- name: model-server-metrics-https-insecure-skip-verify
77+
value: "true"
78+
- name: has-enable-leader-election
79+
value: false
4080

4181
inferencePool:
4282
targetPorts:
4383
- number: 8000
4484
modelServerType: vllm # vllm, triton-tensorrt-llm
45-
modelServers: # REQUIRED
85+
modelServers:
4686
matchLabels:
4787
app: vllm-llama3-8b-instruct
4888

0 commit comments

Comments
 (0)