Skip to content

Commit 10ca1d4

Browse files
authored
Merge branch 'neuralmagic:main' into main
2 parents e3be0ef + 023c8dd commit 10ca1d4

File tree

6 files changed

+115
-2
lines changed

6 files changed

+115
-2
lines changed

.github/workflows/development.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: Development
22

33
on:
4-
pull_request:
4+
pull_request_target:
55
types: [opened, synchronize, reopened]
66

77
jobs:

deploy/Containerfile

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
ARG PYTHON=3.13
2+
3+
# Use a multi-stage build to create a lightweight production image
4+
FROM docker.io/python:${PYTHON}-slim as builder
5+
6+
# Copy repository files
7+
COPY / /src
8+
9+
# Create a venv and install guidellm
10+
RUN python3 -m venv /opt/guidellm \
11+
&& /opt/guidellm/bin/pip install --no-cache-dir /src
12+
13+
# Copy entrypoint script into the venv bin directory
14+
RUN install -m0755 /src/deploy/entrypoint.sh /opt/guidellm/bin/entrypoint.sh
15+
16+
# Prod image
17+
FROM docker.io/python:${PYTHON}-slim
18+
19+
# Copy the virtual environment from the builder stage
20+
COPY --from=builder /opt/guidellm /opt/guidellm
21+
22+
# Add guidellm bin to PATH
23+
ENV PATH="/opt/guidellm/bin:$PATH"
24+
25+
# Create a non-root user
26+
RUN useradd -md /results guidellm
27+
28+
# Switch to non-root user
29+
USER guidellm
30+
31+
# Set working directory
32+
WORKDIR /results
33+
34+
# Metadata
35+
LABEL org.opencontainers.image.source="https://github.com/neuralmagic/guidellm" \
36+
org.opencontainers.image.description="GuideLLM Performance Benchmarking Container"
37+
38+
# Set the environment variable for the benchmark script
39+
# TODO: Replace with scenario environment variables
40+
ENV GUIDELLM_TARGET="http://localhost:8000" \
41+
GUIDELLM_MODEL="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16" \
42+
GUIDELLM_RATE_TYPE="sweep" \
43+
GUIDELLM_DATA="prompt_tokens=256,output_tokens=128" \
44+
GUIDELLM_MAX_REQUESTS="100" \
45+
GUIDELLM_MAX_SECONDS="" \
46+
GUIDELLM_OUTPUT_PATH="/results/results.json"
47+
48+
ENTRYPOINT [ "/opt/guidellm/bin/entrypoint.sh" ]

deploy/entrypoint.sh

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# Path to the guidellm binary
5+
guidellm_bin="/opt/guidellm/bin/guidellm"
6+
7+
# If we receive any arguments switch to guidellm command
8+
if [ $# -gt 0 ]; then
9+
echo "Running command: guidellm $*"
10+
exec $guidellm_bin "$@"
11+
fi
12+
13+
# Get a list of environment variables that start with GUIDELLM_
14+
args="$(printenv | cut -d= -f1 | grep -E '^GUIDELLM_')"
15+
16+
# NOTE: Bash array + exec prevent shell escape issues
17+
CMD=("${guidellm_bin}" "benchmark")
18+
19+
# Parse environment variables for the benchmark command
20+
for var in $args; do
21+
# Remove GUIDELLM_ prefix
22+
arg_name="${var#GUIDELLM_}"
23+
24+
# If there is an extra underscore at the
25+
# start than this is a config variable
26+
if [ "${arg_name:0:1}" == "_" ]; then
27+
continue
28+
fi
29+
30+
# Convert to lowercase
31+
arg_name="${arg_name,,}"
32+
# Replace underscores with dashes
33+
arg_name="${arg_name//_/-}"
34+
35+
# Add the argument to the command array if set
36+
if [ -n "${!var}" ]; then
37+
CMD+=("--${arg_name}" "${!var}")
38+
fi
39+
done
40+
41+
# Execute the command
42+
echo "Running command: ${CMD[*]}"
43+
exec "${CMD[@]}"

src/guidellm/backend/backend.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,14 @@ def info(self) -> dict[str, Any]:
110110
"""
111111
...
112112

113+
@abstractmethod
114+
async def reset(self) -> None:
115+
"""
116+
Reset the connection object. This is useful for backends that
117+
reuse connections or have state that needs to be cleared.
118+
"""
119+
...
120+
113121
async def validate(self):
114122
"""
115123
Handle final setup and validate the backend is ready for use.
@@ -126,6 +134,8 @@ async def validate(self):
126134
): # type: ignore[attr-defined]
127135
pass
128136

137+
await self.reset()
138+
129139
@abstractmethod
130140
async def check_setup(self):
131141
"""

src/guidellm/backend/openai.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,15 @@ def info(self) -> dict[str, Any]:
167167
"chat_completions_path": CHAT_COMPLETIONS_PATH,
168168
}
169169

170+
async def reset(self) -> None:
171+
"""
172+
Reset the connection object. This is useful for backends that
173+
reuse connections or have state that needs to be cleared.
174+
For this backend, it closes the async client if it exists.
175+
"""
176+
if self._async_client is not None:
177+
await self._async_client.aclose()
178+
170179
async def check_setup(self):
171180
"""
172181
Check if the backend is setup correctly and can be used for requests.
@@ -369,7 +378,7 @@ def _get_async_client(self) -> httpx.AsyncClient:
369378
370379
:return: The async HTTP client.
371380
"""
372-
if self._async_client is None:
381+
if self._async_client is None or self._async_client.is_closed:
373382
client = httpx.AsyncClient(
374383
http2=self.http2,
375384
timeout=self.timeout,

tests/unit/mock_backend.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ def model(self) -> Optional[str]:
4141
def info(self) -> dict[str, Any]:
4242
return {}
4343

44+
async def reset(self) -> None:
45+
pass
46+
4447
async def prepare_multiprocessing(self):
4548
pass
4649

0 commit comments

Comments
 (0)