Adds vLLM CPU and Sim Support to Release Script (#1029)

danehans · web-flow · commit 8c0b2c584c99 · 2025-06-20T10:08:53.000-07:00
Signed-off-by: Daneyon Hansen &lt;daneyon.hansen@solo.io&gt;
diff --git a/.github/ISSUE_TEMPLATE/new-release.md b/.github/ISSUE_TEMPLATE/new-release.md
@@ -35,10 +35,14 @@ This document defines the process for releasing Gateway API Inference Extension.
    export RC=1
    ```
 
-4. The vLLM image tag defaults to `v0.7.2` for a release. Set the `VLLM` environment variable if a newer [tag][vllm-tag] has been published. For example:
+4. Refer to the [release-quickstart script][release-quickstart] for the default image tags used
+   by the vLLM deployment manifests. If a newer [GPU][vllm-gpu-tag], [CPU][vllm-cpu-tag], or [Simulator][vllm-sim-tag]
+   tag has been published, set the appropriate environment variable or update the script. For example:
 
    ```shell
-   export VLLM=0.7.3
+   export VLLM_GPU=0.9.2
+   export VLLM_CPU=0.9.3
+   export VLLM_SIM=0.1.2
    ```
 
 ## Release Process
@@ -159,4 +163,7 @@ Use the following steps to announce the release.
 [k8s.io]: https://github.com/kubernetes/k8s.io
 [yaml]: https://github.com/kubernetes/k8s.io/blob/main/registry.k8s.io/images/k8s-staging-gateway-api-inference-extension/images.yaml
 [issue]: https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/new/choose
-[vllm-tag]: https://hub.docker.com/r/vllm/vllm-openai/tags
+[vllm-gpu-tag]: https://hub.docker.com/r/vllm/vllm-openai/tags
+[vllm-cpu-tag]: https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
+[vllm-sim-tag]: https://github.com/llm-d/llm-d-inference-sim/pkgs/container/llm-d-inference-sim
+[release-quickstart]: https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/hack/release-quickstart.sh
diff --git a/hack/release-quickstart.sh b/hack/release-quickstart.sh
@@ -29,11 +29,18 @@ else
   RELEASE_TAG="v${MAJOR}.${MINOR}.0-rc.${RC}"
 fi
 
-# vLLM image version (default to 0.7.2 if not defined)
-VLLM="${VLLM:-0.7.2}"
+# The vLLM image versions
+# The GPU image is from https://hub.docker.com/layers/vllm/vllm-openai
+VLLM_GPU="${VLLM_GPU:-0.9.1}"
+# The CPU image is from https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
+VLLM_CPU="${VLLM_CPU:-0.9.1}"
+# The sim image is from https://github.com/llm-d/llm-d-inference-sim/pkgs/container/llm-d-inference-sim
+VLLM_SIM="${VLLM_SIM:-0.1.1}"
 
 echo "Using release tag: ${RELEASE_TAG}"
-echo "Using vLLM image version: ${VLLM}"
+echo "Using vLLM GPU image version: ${VLLM_GPU}"
+echo "Using vLLM CPU image version: ${VLLM_CPU}"
+echo "Using vLLM Simulator image version: ${VLLM_SIM}"
 
 # -----------------------------------------------------------------------------
 # Update pkg/README.md
@@ -64,30 +71,48 @@ sed -i.bak -E "s|(tag: )[^\"[:space:]]+|\1${RELEASE_TAG}|g" "$EPP_HELM"
 sed -i.bak -E "s|(tag: )[^\"[:space:]]+|\1${RELEASE_TAG}|g" "$BBR_HELM"
 
 # Update the container image pull policy.
-sed -i.bak '/us-central1-docker.pkg.dev\/k8s-staging-images\/gateway-api-inference-extension\/epp/ { n; s/Always/IfNotPresent/ }' "$EPP"
+sed -i.bak '/us-central1-docker.pkg.dev\/k8s-staging-images\/gateway-api-inference-extension\/epp/{n;s/Always/IfNotPresent/;}' "$EPP"
 
 # Update the container registry.
 sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io|g" "$EPP"
 sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io|g" "$EPP_HELM"
 sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io|g" "$BBR_HELM"
 
 # -----------------------------------------------------------------------------
-# Update config/manifests/vllm/gpu-deployment.yaml
+# Update vLLM deployment manifests
 # -----------------------------------------------------------------------------
-VLLM_DEPLOY="config/manifests/vllm/gpu-deployment.yaml"
-echo "Updating ${VLLM_DEPLOY} ..."
+VLLM_GPU_DEPLOY="config/manifests/vllm/gpu-deployment.yaml"
+echo "Updating ${VLLM_GPU_DEPLOY} ..."
 
-# Update the vLLM image version
-sed -i.bak -E "s|(vllm/vllm-openai:)[^\"[:space:]]+|\1v${VLLM}|g" "$VLLM_DEPLOY"
+# Update the vLLM GPU image version
+sed -i.bak -E "s|(vllm/vllm-openai:)[^\"[:space:]]+|\1v${VLLM_GPU}|g" "$VLLM_GPU_DEPLOY"
 
 # Also change the imagePullPolicy from Always to IfNotPresent on lines containing the vLLM image.
-sed -i.bak '/vllm\/vllm-openai/ { n; s/Always/IfNotPresent/ }' "$VLLM_DEPLOY"
+sed -i.bak '/vllm\/vllm-openai/{n;s/Always/IfNotPresent/;}' "$VLLM_GPU_DEPLOY"
+
+VLLM_CPU_DEPLOY="config/manifests/vllm/cpu-deployment.yaml"
+echo "Updating ${VLLM_CPU_DEPLOY} ..."
+
+# Update the vLLM CPU image version
+sed -i.bak -E "s|(q9t5s3a7/vllm-cpu-release-repo:)[^\"[:space:]]+|\1v${VLLM_CPU}|g" "$VLLM_CPU_DEPLOY"
+
+# Also change the imagePullPolicy from Always to IfNotPresent on lines containing the vLLM CPU image.
+sed -i.bak '/q9t5s3a7\/vllm-cpu-release-repo/{n;s/Always/IfNotPresent/;}' "$VLLM_CPU_DEPLOY"
+
+VLLM_SIM_DEPLOY="config/manifests/vllm/sim-deployment.yaml"
+echo "Updating ${VLLM_SIM_DEPLOY} ..."
+
+# Update the vLLM Simulator image version
+sed -i.bak -E "s|(llm-d/llm-d-inference-sim:)[^\"[:space:]]+|\1v${VLLM_SIM}|g" "$VLLM_SIM_DEPLOY"
+
+# Also change the imagePullPolicy from Always to IfNotPresent on lines containing the vLLM image.
+sed -i.bak '/llm-d\/llm-d-inference-sim/{n;s/Always/IfNotPresent/;}' "$VLLM_SIM_DEPLOY"
 
 # -----------------------------------------------------------------------------
 # Stage the changes
 # -----------------------------------------------------------------------------
-echo "Staging $README $EPP $EPP_HELM $BBR_HELM $VLLM_DEPLOY files..."
-git add $README $EPP $EPP_HELM $BBR_HELM $VLLM_DEPLOY
+echo "Staging $README $EPP $EPP_HELM $BBR_HELM $VLLM_GPU_DEPLOY $VLLM_CPU_DEPLOY $VLLM_SIM_DEPLOY files..."
+git add $README $EPP $EPP_HELM $BBR_HELM $VLLM_GPU_DEPLOY $VLLM_CPU_DEPLOY $VLLM_SIM_DEPLOY
 
 # -----------------------------------------------------------------------------
 # Cleanup backup files and finish