diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
index 9a8be09f9..c768f6130 100644
--- a/config/charts/inferencepool/README.md
+++ b/config/charts/inferencepool/README.md
@@ -101,16 +101,16 @@ $ helm install triton-llama3-8b-instruct \
 
 ### Install with High Availability (HA)
 
-To deploy the EndpointPicker in a high-availability (HA) active-passive configuration, you can enable leader election. When enabled, the EPP deployment will have multiple replicas, but only one "leader" replica will be active and ready to process traffic at any given time. If the leader pod fails, another pod will be elected as the new leader, ensuring service continuity.
+To deploy the EndpointPicker in a high-availability (HA) active-passive configuration set replicas to be greater than one. In such a setup, only one "leader" replica will be active and ready to process traffic at any given time. If the leader pod fails, another pod will be elected as the new leader, ensuring service continuity.
 
-To enable HA, set `inferenceExtension.enableLeaderElection` to `true`.
+To enable HA, set `inferenceExtension.replicas` to a number greater than 1.
 
 * Via `--set` flag:
 
   ```txt
   helm install vllm-llama3-8b-instruct \
   --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
-  --set inferenceExtension.enableLeaderElection=true \
+  --set inferenceExtension.replicas=3 \
   --set provider=[none|gke] \
   oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
   ```
@@ -119,7 +119,7 @@ To enable HA, set `inferenceExtension.enableLeaderElection` to `true`.
 
   ```yaml
   inferenceExtension:
-    enableLeaderElection: true
+    replicas: 3
   ```
 
   Then apply it with:
@@ -172,7 +172,7 @@ The following table list the configurable parameters of the chart.
 | `inferencePool.targetPortNumber`            | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. |
 | `inferencePool.modelServerType`            | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. |
 | `inferencePool.modelServers.matchLabels`    | Label selector to match vllm backends managed by the inference pool.                                                   |
-| `inferenceExtension.replicas`               | Number of replicas for the endpoint picker extension service. Defaults to `1`.                                         |
+| `inferenceExtension.replicas`               | Number of replicas for the endpoint picker extension service. If More than one replica is used, EPP will run in HA active-passive mode. Defaults to `1`.                                         |
 | `inferenceExtension.image.name`             | Name of the container image used for the endpoint picker.                                                              |
 | `inferenceExtension.image.hub`              | Registry URL where the endpoint picker image is hosted.                                                                |
 | `inferenceExtension.image.tag`              | Image tag of the endpoint picker.                                                                                      |
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
index d37ba3e94..6ed60b20d 100644
--- a/config/charts/inferencepool/templates/epp-deployment.yaml
+++ b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -6,11 +6,7 @@ metadata:
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
 spec:
-  {{- if .Values.inferenceExtension.enableLeaderElection }}
-  replicas: 3
-  {{- else }}
-  replicas: 1
-  {{- end }}
+  replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
   strategy:
     # The current recommended EPP deployment pattern is to have a single active replica. This ensures 
     # optimal performance of the stateful operations such prefix cache aware scorer.
@@ -53,7 +49,7 @@ spec:
         - --lora-info-metric
         - "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet.
         {{- end }}
-        {{- if .Values.inferenceExtension.enableLeaderElection }}
+        {{- if gt .Values.inferenceExtension.replicas 1 }}
         - --ha-enable-leader-election
         {{- end }}
         # Pass additional flags via the inferenceExtension.flags field in values.yaml.
@@ -72,7 +68,7 @@ spec:
         {{- toYaml .Values.inferenceExtension.extraContainerPorts | nindent 8 }}
         {{- end }}
         livenessProbe:
-          {{- if .Values.inferenceExtension.enableLeaderElection }}
+          {{- if gt .Values.inferenceExtension.replicas 1 }}
           grpc:
             port: 9003
             service: liveness
@@ -84,7 +80,7 @@ spec:
           initialDelaySeconds: 5
           periodSeconds: 10
         readinessProbe:
-          {{- if .Values.inferenceExtension.enableLeaderElection }}
+          {{- if gt .Values.inferenceExtension.replicas 1 }}
           grpc:
             port: 9003
             service: readiness
diff --git a/config/charts/inferencepool/templates/leader-election-rbac.yaml b/config/charts/inferencepool/templates/leader-election-rbac.yaml
index 923bdd6f4..8816dac7d 100644
--- a/config/charts/inferencepool/templates/leader-election-rbac.yaml
+++ b/config/charts/inferencepool/templates/leader-election-rbac.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.inferenceExtension.enableLeaderElection }}
+{{- if gt .Values.inferenceExtension.replicas 1 }}
 ---
 kind: Role
 apiVersion: rbac.authorization.k8s.io/v1