- added epp-deployment container ports configurability (with service exposure if needed) (#1211)

vMaroon · web-flow · commit c561234a33ed · 2025-07-22T09:14:28.000-07:00
- made epp-deployment env configuration flexible
- made epp logging verbosity configurable

Signed-off-by: Maroon Ayoub &lt;maroon.ayoub@ibm.com&gt;
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
@@ -24,26 +24,44 @@ Note that the provider name is needed to deploy provider-specific resources. If
 
 ### Install with Custom Environment Variables
 
-To set custom environment variables for the EndpointPicker deployment:
+To set custom environment variables for the EndpointPicker deployment, you can define them as free-form YAML in the `values.yaml` file:
+
+```yaml
+inferenceExtension:
+  env:
+    - name: FEATURE_FLAG_ENABLED
+      value: "true"
+    - name: CUSTOM_ENV_VAR
+      value: "custom_value"
+    - name: POD_IP
+      valueFrom:
+        fieldRef:
+          fieldPath: status.podIP
+```
+
+Then apply it with:
 
 ```txt
-$ helm install vllm-llama3-8b-instruct \
-  --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
-  --set provider.name=[none|gke] \
-  --set inferenceExtension.env.FEATURE_FLAG_ENABLED=true \
-  oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
+$ helm install vllm-llama3-8b-instruct ./config/charts/inferencepool -f values.yaml
 ```
 
-Alternatively, you can define environment variables in a values file:
+### Install with Additional Ports
+
+To expose additional ports (e.g., for ZMQ), you can define them in the `values.yaml` file:
 
 ```yaml
-# values.yaml
 inferenceExtension:
-  env:
-    FEATURE_FLAG_ENABLED: "true"
+  extraContainerPorts:
+    - name: zmq
+      containerPort: 5557
+      protocol: TCP
+  extraServicePorts: # if need to expose the port for external communication
+    - name: zmq
+      port: 5557
+      protocol: TCP
 ```
 
-And apply it with:
+Then apply it with:
 
 ```txt
 $ helm install vllm-llama3-8b-instruct ./config/charts/inferencepool -f values.yaml
@@ -84,7 +102,10 @@ The following table list the configurable parameters of the chart.
 | `inferenceExtension.image.tag`              | Image tag of the endpoint picker.                                                                                      |
 | `inferenceExtension.image.pullPolicy`       | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`.      |
 | `inferenceExtension.extProcPort`            | Port where the endpoint picker service is served for external processing. Defaults to `9002`.                          |
-| `inferenceExtension.env`                    | Map of environment variables to set in the endpoint picker container. Defaults to `{}`.                                |
+| `inferenceExtension.env`                    | List of environment variables to set in the endpoint picker container as free-form YAML. Defaults to `[]`.             |
+| `inferenceExtension.extraContainerPorts`    | List of additional container ports to expose. Defaults to `[]`.                                                       |
+| `inferenceExtension.extraServicePorts`      | List of additional service ports to expose. Defaults to `[]`.                                                         |
+| `inferenceExtension.logVerbosity`           | Logging verbosity level for the endpoint picker. Defaults to `"3"`.                                                   |
 | `provider.name`                             | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`.                   |
 
 ## Notes
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -28,7 +28,7 @@ spec:
         - --pool-namespace
         - {{ .Release.Namespace }}
         - --v
-        - "3"
+        - "{{ .Values.inferenceExtension.logVerbosity | default "3" }}"
         - --grpc-port
         - "9002"
         - --grpc-health-port
@@ -54,6 +54,9 @@ spec:
           containerPort: 9003
         - name: metrics
           containerPort: 9090
+        {{- with .Values.inferenceExtension.extraContainerPorts }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
         livenessProbe:
           grpc:
             port: 9003
@@ -66,10 +69,9 @@ spec:
             service: inference-extension
           initialDelaySeconds: 5
           periodSeconds: 10
+        {{- with .Values.inferenceExtension.env }}
         env:
-        {{- range $key, $value := .Values.inferenceExtension.env }}
-        - name: {{ $key }}
-          value: {{ $value | quote }}
+        {{- toYaml . | nindent 8 }}
         {{- end }}
         volumeMounts:
         - name: plugins-config-volume
diff --git a/config/charts/inferencepool/templates/epp-service.yaml b/config/charts/inferencepool/templates/epp-service.yaml
@@ -15,4 +15,7 @@ spec:
     - name: http-metrics
       protocol: TCP
       port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
+    {{- with .Values.inferenceExtension.extraServicePorts }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
   type: ClusterIP
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -6,7 +6,7 @@ inferenceExtension:
     tag: main
     pullPolicy: Always
   extProcPort: 9002
-  env: {}
+  env: []
   enablePprof: true # Enable pprof handlers for profiling and debugging
   # This is the plugins configuration file. 
   pluginsConfigFile: "default-plugins.yaml"
@@ -32,6 +32,11 @@ inferenceExtension:
   # env:
   #   KV_CACHE_SCORE_WEIGHT: "1"
 
+  # Define additional container ports
+  extraContainerPorts: []
+  # Define additional service ports
+  extraServicePorts: []
+
 inferencePool:
   targetPortNumber: 8000
   modelServerType: vllm # vllm, triton-tensorrt-llm