diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml
deleted file mode 100644
index ffe19654b..000000000
--- a/config/manifests/inferencepool-resources.yaml
+++ /dev/null
@@ -1,186 +0,0 @@
-# Note: If you change this file, please also change:
-#  - ./test/testdata/inferencepool-e2e.yaml
-#  - ./conformance/resources/manifests/manifests.yaml
-#  - ./site-src/guides/inferencepool-rollout.md
----
-apiVersion: inference.networking.k8s.io/v1
-kind: InferencePool
-metadata:
-  name: vllm-llama3-8b-instruct
-spec:
-  targetPorts:
-    - number: 8000
-  selector:
-    matchLabels:
-      app: vllm-llama3-8b-instruct
-  endpointPickerRef:
-    name: vllm-llama3-8b-instruct-epp
-    kind: Service
-    port:
-      number: 9002
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: vllm-llama3-8b-instruct-epp
-  namespace: default
-spec:
-  selector:
-    app: vllm-llama3-8b-instruct-epp
-  ports:
-    - protocol: TCP
-      port: 9002
-      targetPort: 9002
-      appProtocol: http2
-  type: ClusterIP
----
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: vllm-llama3-8b-instruct-epp
-  namespace: default
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: vllm-llama3-8b-instruct-epp
-  namespace: default
-  labels:
-    app: vllm-llama3-8b-instruct-epp
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: vllm-llama3-8b-instruct-epp
-  template:
-    metadata:
-      labels:
-        app: vllm-llama3-8b-instruct-epp
-    spec:
-      serviceAccountName: vllm-llama3-8b-instruct-epp
-      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
-      terminationGracePeriodSeconds: 130
-      containers:
-      - name: epp
-        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
-        imagePullPolicy: Always
-        args:
-        - --pool-name
-        - "vllm-llama3-8b-instruct"
-        - "--pool-namespace"
-        - "default"
-        - --v
-        - "4"
-        - --zap-encoder
-        - "json"
-        - --grpc-port
-        - "9002"
-        - --grpc-health-port
-        - "9003"
-        - "--config-file"
-        - "/config/default-plugins.yaml"
-        ports:
-        - containerPort: 9002
-        - containerPort: 9003
-        - name: metrics
-          containerPort: 9090
-        livenessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        readinessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        volumeMounts:
-        - name: plugins-config-volume
-          mountPath: "/config"
-      volumes:
-      - name: plugins-config-volume
-        configMap:
-          name: plugins-config
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: plugins-config
-  namespace: default
-data:
-  default-plugins.yaml: |
-    apiVersion: inference.networking.x-k8s.io/v1alpha1
-    kind: EndpointPickerConfig
-    plugins:
-    - type: queue-scorer
-    - type: kv-cache-utilization-scorer
-    - type: prefix-cache-scorer
-    schedulingProfiles:
-    - name: default
-      plugins:
-      - pluginRef: queue-scorer
-      - pluginRef: kv-cache-utilization-scorer
-      - pluginRef: prefix-cache-scorer
----
-kind: Role
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: pod-read
-  namespace: default
-rules:
-- apiGroups: [ "inference.networking.x-k8s.io" ]
-  resources: [ "inferenceobjectives", "inferencepools" ]
-  verbs: [ "get", "watch", "list" ]
-- apiGroups: [ "inference.networking.k8s.io" ]
-  resources: [ "inferencepools" ]
-  verbs: [ "get", "watch", "list" ]
-- apiGroups: [ "" ]
-  resources: [ "pods" ]
-  verbs: [ "get", "watch", "list" ]
----
-kind: RoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: pod-read-binding
-  namespace: default
-subjects:
-- kind: ServiceAccount
-  name: vllm-llama3-8b-instruct-epp
-  namespace: default
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: Role
-  name: pod-read
----
-kind: ClusterRole
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: auth-reviewer
-rules:
-- apiGroups:
-  - authentication.k8s.io
-  resources:
-  - tokenreviews
-  verbs:
-  - create
-- apiGroups:
-  - authorization.k8s.io
-  resources:
-  - subjectaccessreviews
-  verbs:
-  - create
----
-kind: ClusterRoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: auth-reviewer-binding
-subjects:
-- kind: ServiceAccount
-  name: vllm-llama3-8b-instruct-epp
-  namespace: default
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: ClusterRole
-  name: auth-reviewer
diff --git a/hack/release-quickstart.sh b/hack/release-quickstart.sh
index 22c705184..04b79a3ef 100755
--- a/hack/release-quickstart.sh
+++ b/hack/release-quickstart.sh
@@ -74,25 +74,21 @@ sed -i.bak "s|kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-in
 # -----------------------------------------------------------------------------
 # Update image references
 # -----------------------------------------------------------------------------
-EPP="config/manifests/inferencepool-resources.yaml"
 #TODO: Put all helm values files into an array to loop over
 EPP_HELM="config/charts/inferencepool/values.yaml"
 BBR_HELM="config/charts/body-based-routing/values.yaml"
 CONFORMANCE_MANIFESTS="conformance/resources/base.yaml"
-echo "Updating ${EPP}, ${EPP_HELM}, ${BBR_HELM}, and ${CONFORMANCE_MANIFESTS} ..."
+echo "Updating ${EPP_HELM}, ${BBR_HELM}, and ${CONFORMANCE_MANIFESTS} ..."
 
 # Update the container tag.
-sed -i.bak -E "s|(us-central1-docker\.pkg\.dev/k8s-staging-images/gateway-api-inference-extension/epp:)[^\"[:space:]]+|\1${RELEASE_TAG}|g" "$EPP"
 sed -i.bak -E "s|(tag: )[^\"[:space:]]+|\1${RELEASE_TAG}|g" "$EPP_HELM"
 sed -i.bak -E "s|(tag: )[^\"[:space:]]+|\1${RELEASE_TAG}|g" "$BBR_HELM"
 sed -i.bak -E "s|(us-central1-docker\.pkg\.dev/k8s-staging-images/gateway-api-inference-extension/epp:)[^\"[:space:]]+|\1${RELEASE_TAG}|g" "$CONFORMANCE_MANIFESTS"
 
 # Update the container image pull policy.
-sed -i.bak '/us-central1-docker.pkg.dev\/k8s-staging-images\/gateway-api-inference-extension\/epp/{n;s/Always/IfNotPresent/;}' "$EPP"
 sed -i.bak '/us-central1-docker.pkg.dev\/k8s-staging-images\/gateway-api-inference-extension\/epp/{n;s/Always/IfNotPresent/;}' "$CONFORMANCE_MANIFESTS"
 
 # Update the container registry.
-sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io|g" "$EPP"
 sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io|g" "$EPP_HELM"
 sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io|g" "$BBR_HELM"
 sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io|g" "$CONFORMANCE_MANIFESTS"
@@ -139,8 +135,8 @@ sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io
 # -----------------------------------------------------------------------------
 # Stage the changes
 # -----------------------------------------------------------------------------
-echo "Staging $VERSION_FILE $UPDATED_CRD $README $EPP $EPP_HELM $BBR_HELM $CONFORMANCE_MANIFESTS $VLLM_GPU_DEPLOY $VLLM_CPU_DEPLOY $VLLM_SIM_DEPLOY files..."
-git add $VERSION_FILE $UPDATED_CRD $README $EPP $EPP_HELM $BBR_HELM $CONFORMANCE_MANIFESTS $VLLM_GPU_DEPLOY $VLLM_CPU_DEPLOY $VLLM_SIM_DEPLOY
+echo "Staging $VERSION_FILE $UPDATED_CRD $README $EPP_HELM $BBR_HELM $CONFORMANCE_MANIFESTS $VLLM_GPU_DEPLOY $VLLM_CPU_DEPLOY $VLLM_SIM_DEPLOY files..."
+git add $VERSION_FILE $UPDATED_CRD $README $EPP_HELM $BBR_HELM $CONFORMANCE_MANIFESTS $VLLM_GPU_DEPLOY $VLLM_CPU_DEPLOY $VLLM_SIM_DEPLOY
 
 # -----------------------------------------------------------------------------
 # Cleanup backup files and finish
diff --git a/site-src/implementations/model-servers.md b/site-src/implementations/model-servers.md
index da9968fad..ed57e1252 100644
--- a/site-src/implementations/model-servers.md
+++ b/site-src/implementations/model-servers.md
@@ -19,34 +19,29 @@ vLLM is configured as the default in the [endpoint picker extension](https://git
 
 Triton specific metric names need to be specified when starting the EPP.
 
-### Option 1: Use Helm
+Use `--set inferencePool.modelServerType=triton-tensorrt-llm` to install the `inferencepool` via helm. See the [`inferencepool` helm guide](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/config/charts/inferencepool/README.md) for more details.
 
-Use `--set inferencePool.modelServerType=triton-tensorrt-llm` to install the [`inferencepool` via helm](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/42eb5ff1c5af1275df43ac384df0ddf20da95134/config/charts/inferencepool). See the [`inferencepool` helm guide](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/42eb5ff1c5af1275df43ac384df0ddf20da95134/config/charts/inferencepool/README.md) for more details.
+ Add the following to the `flags` in the helm chart as [flags to EPP](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/29ea29028496a638b162ff287c62c0087211bbe5/config/charts/inferencepool/values.yaml#L36)
 
-### Option 2: Edit EPP deployment yaml
-
- Add the following to the `args` of the [EPP deployment](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/42eb5ff1c5af1275df43ac384df0ddf20da95134/config/manifests/inferencepool-resources.yaml#L32)
-
- ```
-- --total-queued-requests-metric
-- "nv_trt_llm_request_metrics{request_type=waiting}"
-- --kv-cache-usage-percentage-metric
-- "nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}"
-- --lora-info-metric
-- "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet.
+```
+- name=total-queued-requests-metric
+  value="nv_trt_llm_request_metrics{request_type=waiting}"
+- name=kv-cache-usage-percentage-metric
+  value="nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}"
+- name=lora-info-metric
+  value="" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet.
 ```
 
 ## SGLang
 
-### Edit EPP deployment yaml
+ Add the following `flags` while deploying using helm charts in the [EPP deployment](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/29ea29028496a638b162ff287c62c0087211bbe5/config/charts/inferencepool/values.yaml#L36)
 
- Add the following to the `args` of the [EPP deployment](https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/42eb5ff1c5af1275df43ac384df0ddf20da95134/config/manifests/inferencepool-resources.yaml#L32)
 
 ```
-- --totalQueuedRequestsMetric
-- "sglang:num_queue_reqs"
-- --kvCacheUsagePercentageMetric
-- "sglang:token_usage"
-- --lora-info-metric
-- "" # Set an empty metric to disable LoRA metric scraping as they are not supported by SGLang yet.
-```
+- name=total-queued-requests-metric
+  value="sglang:num_queue_reqs"
+- name=kv-cache-usage-percentage-metric
+  value="sglang:token_usage"
+- name=lora-info-metric
+  value="" # Set an empty metric to disable LoRA metric scraping as they are not supported by SGLang yet.
+```
\ No newline at end of file