Drop Entrypoint and Improve Containerfile (#250)

sjmonson · web-flow · commit ef04f6045273 · 2025-07-30T18:09:41.000-04:00
* Drop the container entrypoint script as GuideLLM has had native support for its features since #99 * Make containerfile more rebuild friendly based on #213 * Drop the ENV default scenario as it is confusing to users setting CLI args Closes: #213 --------- Signed-off-by: Samuel Monson <smonson@redhat.com>
diff --git a/README.md b/README.md
@@ -52,6 +52,25 @@ pip install git+https://github.com/vllm-project/guidellm.git
 
 For detailed installation instructions and requirements, see the [Installation Guide](https://github.com/vllm-project/guidellm/blob/main/docs/install.md).
 
+### With Podman / Docker
+
+Alternatively we publish container images at [ghcr.io/vllm-project/guidellm](https://github.com/vllm-project/guidellm/pkgs/container/guidellm). Running a container is (by default) equivalent to `guidellm benchmark run`:
+
+```bash
+podman run \
+  --rm -it \
+  -v "./results:/results:rw" \
+  -e GUIDELLM_TARGET=http://localhost:8000 \
+  -e GUIDELLM_RATE_TYPE=sweep \
+  -e GUIDELLM_MAX_SECONDS=30 \
+  -e GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \
+  ghcr.io/vllm-project/guidellm:latest
+```
+
+> [!TIP] CLI options can also be specified as ENV variables (E.g. `--rate-type sweep` -> `GUIDELLM_RATE_TYPE=sweep`). If both are specified then the CLI option overrides the the ENV.
+
+Replace `latest` with `stable` for the newest tagged release or set a specific release if desired.
+
 ### Quick Start
 
 #### 1. Start an OpenAI Compatible Server (vLLM)
diff --git a/deploy/Containerfile b/deploy/Containerfile
@@ -1,26 +1,26 @@
-ARG PYTHON=3.13
+ARG BASE_IMAGE=docker.io/python:3.13-slim
 
 # Use a multi-stage build to create a lightweight production image
-FROM docker.io/python:${PYTHON}-slim as builder
+FROM $BASE_IMAGE as builder
+
+# Ensure files are installed as root
+USER root
 
 # Copy repository files
-COPY / /src
+COPY / /opt/app-root/src
 
 # Create a venv and install guidellm
-RUN python3 -m venv /opt/guidellm \
-    && /opt/guidellm/bin/pip install --no-cache-dir /src
-
-# Copy entrypoint script into the venv bin directory
-RUN install -m0755 /src/deploy/entrypoint.sh /opt/guidellm/bin/entrypoint.sh
+RUN python3 -m venv /opt/app-root/guidellm \
+    && /opt/app-root/guidellm/bin/pip install --no-cache-dir /opt/app-root/src
 
 # Prod image
-FROM docker.io/python:${PYTHON}-slim
+FROM $BASE_IMAGE
 
 # Copy the virtual environment from the builder stage
-COPY --from=builder /opt/guidellm /opt/guidellm
+COPY --from=builder /opt/app-root/guidellm /opt/app-root/guidellm
 
 # Add guidellm bin to PATH
-ENV PATH="/opt/guidellm/bin:$PATH"
+ENV PATH="/opt/app-root/guidellm/bin:$PATH"
 
 # Create a non-root user
 RUN useradd -md /results guidellm
@@ -35,14 +35,8 @@ WORKDIR /results
 LABEL org.opencontainers.image.source="https://github.com/vllm-project/guidellm" \
       org.opencontainers.image.description="GuideLLM Performance Benchmarking Container"
 
-# Set the environment variable for the benchmark script
-# TODO: Replace with scenario environment variables
-ENV GUIDELLM_TARGET="http://localhost:8000" \
-    GUIDELLM_MODEL="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16" \
-    GUIDELLM_RATE_TYPE="sweep" \
-    GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \
-    GUIDELLM_MAX_REQUESTS="100" \
-    GUIDELLM_MAX_SECONDS="" \
-    GUIDELLM_OUTPUT_PATH="/results/results.json"
-
-ENTRYPOINT [ "/opt/guidellm/bin/entrypoint.sh" ]
+# Argument defaults can be set with GUIDELLM_<ARG>
+ENV GUIDELLM_OUTPUT_PATH="/results/benchmarks.json"
+
+ENTRYPOINT [ "/opt/app-root/guidellm/bin/guidellm" ]
+CMD [ "benchmark", "run" ]
diff --git a/deploy/entrypoint.sh b/deploy/entrypoint.sh