Merge branch 'neuralmagic:main' into main

jackcook · web-flow · commit 10ca1d4fcd99 · 2025-06-22T15:31:45.000+01:00
diff --git a/.github/workflows/development.yml b/.github/workflows/development.yml
@@ -1,7 +1,7 @@
 name: Development
 
 on:
-  pull_request:
+  pull_request_target:
     types: [opened, synchronize, reopened]
 
 jobs:
diff --git a/deploy/Containerfile b/deploy/Containerfile
@@ -0,0 +1,48 @@
+ARG PYTHON=3.13
+
+# Use a multi-stage build to create a lightweight production image
+FROM docker.io/python:${PYTHON}-slim as builder
+
+# Copy repository files
+COPY / /src
+
+# Create a venv and install guidellm
+RUN python3 -m venv /opt/guidellm \
+    && /opt/guidellm/bin/pip install --no-cache-dir /src
+
+# Copy entrypoint script into the venv bin directory
+RUN install -m0755 /src/deploy/entrypoint.sh /opt/guidellm/bin/entrypoint.sh
+
+# Prod image
+FROM docker.io/python:${PYTHON}-slim
+
+# Copy the virtual environment from the builder stage
+COPY --from=builder /opt/guidellm /opt/guidellm
+
+# Add guidellm bin to PATH
+ENV PATH="/opt/guidellm/bin:$PATH"
+
+# Create a non-root user
+RUN useradd -md /results guidellm
+
+# Switch to non-root user
+USER guidellm
+
+# Set working directory
+WORKDIR /results
+
+# Metadata
+LABEL org.opencontainers.image.source="https://github.com/neuralmagic/guidellm" \
+      org.opencontainers.image.description="GuideLLM Performance Benchmarking Container"
+
+# Set the environment variable for the benchmark script
+# TODO: Replace with scenario environment variables
+ENV GUIDELLM_TARGET="http://localhost:8000" \
+    GUIDELLM_MODEL="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16" \
+    GUIDELLM_RATE_TYPE="sweep" \
+    GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \
+    GUIDELLM_MAX_REQUESTS="100" \
+    GUIDELLM_MAX_SECONDS="" \
+    GUIDELLM_OUTPUT_PATH="/results/results.json"
+
+ENTRYPOINT [ "/opt/guidellm/bin/entrypoint.sh" ]
diff --git a/deploy/entrypoint.sh b/deploy/entrypoint.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Path to the guidellm binary
+guidellm_bin="/opt/guidellm/bin/guidellm"
+
+# If we receive any arguments switch to guidellm command
+if [ $# -gt 0 ]; then
+    echo "Running command: guidellm $*"
+    exec $guidellm_bin "$@"
+fi
+
+# Get a list of environment variables that start with GUIDELLM_
+args="$(printenv | cut -d= -f1 | grep -E '^GUIDELLM_')"
+
+# NOTE: Bash array + exec prevent shell escape issues
+CMD=("${guidellm_bin}" "benchmark")
+
+# Parse environment variables for the benchmark command
+for var in $args; do
+    # Remove GUIDELLM_ prefix
+    arg_name="${var#GUIDELLM_}"
+
+    # If there is an extra underscore at the
+    # start than this is a config variable
+    if [ "${arg_name:0:1}" == "_" ]; then
+        continue
+    fi
+
+    # Convert to lowercase
+    arg_name="${arg_name,,}"
+    # Replace underscores with dashes
+    arg_name="${arg_name//_/-}"
+
+    # Add the argument to the command array if set
+    if [ -n "${!var}" ]; then
+        CMD+=("--${arg_name}" "${!var}")
+    fi
+done
+
+# Execute the command
+echo "Running command: ${CMD[*]}"
+exec "${CMD[@]}"
diff --git a/src/guidellm/backend/backend.py b/src/guidellm/backend/backend.py
@@ -110,6 +110,14 @@ def info(self) -> dict[str, Any]:
         """
         ...
 
+    @abstractmethod
+    async def reset(self) -> None:
+        """
+        Reset the connection object. This is useful for backends that
+        reuse connections or have state that needs to be cleared.
+        """
+        ...
+
     async def validate(self):
         """
         Handle final setup and validate the backend is ready for use.
@@ -126,6 +134,8 @@ async def validate(self):
         ):  # type: ignore[attr-defined]
             pass
 
+        await self.reset()
+
     @abstractmethod
     async def check_setup(self):
         """
diff --git a/src/guidellm/backend/openai.py b/src/guidellm/backend/openai.py
@@ -167,6 +167,15 @@ def info(self) -> dict[str, Any]:
             "chat_completions_path": CHAT_COMPLETIONS_PATH,
         }
 
+    async def reset(self) -> None:
+        """
+        Reset the connection object. This is useful for backends that
+        reuse connections or have state that needs to be cleared.
+        For this backend, it closes the async client if it exists.
+        """
+        if self._async_client is not None:
+            await self._async_client.aclose()
+
     async def check_setup(self):
         """
         Check if the backend is setup correctly and can be used for requests.
@@ -369,7 +378,7 @@ def _get_async_client(self) -> httpx.AsyncClient:
 
         :return: The async HTTP client.
         """
-        if self._async_client is None:
+        if self._async_client is None or self._async_client.is_closed:
             client = httpx.AsyncClient(
                 http2=self.http2,
                 timeout=self.timeout,
diff --git a/tests/unit/mock_backend.py b/tests/unit/mock_backend.py
@@ -41,6 +41,9 @@ def model(self) -> Optional[str]:
     def info(self) -> dict[str, Any]:
         return {}
 
+    async def reset(self) -> None:
+        pass
+
     async def prepare_multiprocessing(self):
         pass