Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit 9543d0b

Browse files
dtrifiroz103cb
authored andcommitted
TGISStatLogger: fix stats usage
1 parent 1cc8906 commit 9543d0b

File tree

2 files changed

+2
-52
lines changed

2 files changed

+2
-52
lines changed

Dockerfile.ubi

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -229,58 +229,6 @@ WORKDIR /usr/src/flash-attention-v2
229229
RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
230230
--no-build-isolation --no-deps --no-cache-dir
231231

232-
233-
## Test ########################################################################
234-
FROM dev AS test
235-
236-
WORKDIR /vllm-workspace
237-
# ADD is used to preserve directory structure
238-
# NB: Could leak secrets from local context, the test image should not be pushed
239-
# to a registry
240-
ADD . /vllm-workspace/
241-
# copy pytorch extensions separately to avoid having to rebuild
242-
# when python code changes
243-
COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
244-
# Install flash attention (from pre-built wheel)
245-
RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
246-
pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
247-
# ignore build dependencies installation because we are using pre-complied extensions
248-
RUN rm pyproject.toml
249-
RUN --mount=type=cache,target=/root/.cache/pip \
250-
VLLM_USE_PRECOMPILED=1 pip install . --verbose
251-
252-
253-
## Proto Compilation ###########################################################
254-
FROM python-base AS gen-protos
255-
256-
RUN microdnf install -y \
257-
make \
258-
findutils \
259-
&& microdnf clean all
260-
261-
RUN --mount=type=cache,target=/root/.cache/pip \
262-
--mount=type=bind,source=Makefile,target=Makefile \
263-
--mount=type=bind,source=proto,target=proto \
264-
make gen-protos
265-
266-
## vLLM Library Files ##########################################################
267-
# Little extra stage to gather files and manage permissions on them without any
268-
# duplication in the release layer due to permission changes
269-
FROM base AS vllm
270-
271-
WORKDIR /vllm-staging
272-
# COPY files from various places into a staging directory
273-
COPY vllm vllm
274-
COPY --from=build /workspace/vllm/*.so vllm/
275-
COPY --from=gen-protos /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb
276-
277-
# custom COPY command to use umask to control permissions and grant permissions
278-
# to the group
279-
RUN umask 002 \
280-
&& cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
281-
# not strictly needed, but .so files typically have executable bits
282-
&& chmod +x /workspace/vllm/*.so
283-
284232
## Release #####################################################################
285233
# Note from the non-UBI Dockerfile:
286234
# We used base cuda image because pytorch installs its own cuda libraries.

vllm/tgis_utils/metrics.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ def log(self, stats: Stats) -> None:
118118
# Then log TGIS specific ones
119119
self.tgi_queue_size.set(stats.num_waiting_sys + stats.num_swapped_sys)
120120
self.tgi_batch_current_size.set(stats.num_running_sys)
121+
self.tgi_queue_size.set(stats.num_waiting_sys + stats.num_swapped_sys)
122+
self.tgi_batch_current_size.set(stats.num_running_sys)
121123

122124
for ttft in stats.time_to_first_tokens_iter:
123125
self.tgi_batch_inference_duration.labels(

0 commit comments

Comments
 (0)