Skip to content

Commit 2f5bfbd

Browse files
kthuioandreeva-nv
andauthored
perf: Upgrade vLLM version to 0.6.3.post1 (#76)
Co-authored-by: Olga Andreeva <[email protected]>
1 parent 0b9c8e2 commit 2f5bfbd

File tree

6 files changed

+354
-382
lines changed

6 files changed

+354
-382
lines changed

ci/L0_backend_vllm/metrics_test/test.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,10 @@ run_test() {
7474
RET=1
7575
fi
7676
fi
77+
7778
set -e
7879

80+
# TODO: Non-graceful shutdown when metrics are enabled.
7981
kill $SERVER_PID
8082
wait $SERVER_PID
8183
}

ci/L0_backend_vllm/metrics_test/vllm_metrics_test.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ def test_vllm_metrics(self):
170170
total_prompts,
171171
)
172172

173+
# TODO: Revisit this test due to the removal of best_of
173174
def test_custom_sampling_params(self):
174175
# Adding sampling parameters for testing metrics.
175176
# Definitions can be found here https://docs.vllm.ai/en/latest/dev/sampling_params.html
@@ -191,6 +192,7 @@ def test_custom_sampling_params(self):
191192
total_prompts = len(self.prompts)
192193

193194
# vllm:request_params_best_of
195+
"""
194196
self.assertEqual(
195197
metrics_dict["vllm:request_params_best_of_count"], total_prompts
196198
)
@@ -200,9 +202,10 @@ def test_custom_sampling_params(self):
200202
self.assertEqual(
201203
metrics_dict["vllm:request_params_best_of_bucket"], total_prompts
202204
)
205+
"""
203206
# vllm:request_params_n
204207
self.assertEqual(metrics_dict["vllm:request_params_n_count"], total_prompts)
205-
self.assertEqual(metrics_dict["vllm:request_params_n_sum"], n * total_prompts)
208+
# self.assertEqual(metrics_dict["vllm:request_params_n_sum"], n * total_prompts)
206209
self.assertEqual(metrics_dict["vllm:request_params_n_bucket"], total_prompts)
207210

208211
def test_vllm_metrics_disabled(self):

ci/L0_check_health_vllm/mock_async_llm_engine.py

Lines changed: 0 additions & 36 deletions
This file was deleted.

ci/L0_check_health_vllm/test.sh

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,24 @@ function enable_health_check {
4747
echo -e "}" >> models/vllm_opt/config.pbtxt
4848
}
4949

50+
VLLM_INSTALL_PATH="/usr/local/lib/python3.12/dist-packages/vllm"
51+
5052
function mock_vllm_async_llm_engine {
51-
mv /opt/tritonserver/backends/vllm/model.py /opt/tritonserver/backends/vllm/.model.py.backup
52-
cp /opt/tritonserver/backends/vllm/.model.py.backup /opt/tritonserver/backends/vllm/model.py
53-
sed -i 's/from vllm.engine.async_llm_engine import AsyncLLMEngine/from mock_async_llm_engine import mock_AsyncLLMEngine as AsyncLLMEngine/' /opt/tritonserver/backends/vllm/model.py
54-
cp mock_async_llm_engine.py /opt/tritonserver/backends/vllm
53+
# backup original file
54+
mv $VLLM_INSTALL_PATH/engine/multiprocessing/client.py $VLLM_INSTALL_PATH/engine/multiprocessing/client.py.backup
55+
cp $VLLM_INSTALL_PATH/engine/multiprocessing/client.py.backup $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
56+
# overwrite the original check_health method
57+
echo -e "" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
58+
echo -e " async def check_health(self, check_count=[0]):" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
59+
echo -e " check_count[0] += 1" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
60+
echo -e " if check_count[0] > 1:" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
61+
echo -e " raise RuntimeError(\"Simulated vLLM check_health() failure\")" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
5562
}
5663

5764
function unmock_vllm_async_llm_engine {
58-
rm -f /opt/tritonserver/backends/vllm/mock_async_llm_engine.py /opt/tritonserver/backends/vllm/model.py
59-
mv /opt/tritonserver/backends/vllm/.model.py.backup /opt/tritonserver/backends/vllm/model.py
65+
# restore from backup
66+
rm -f $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
67+
mv $VLLM_INSTALL_PATH/engine/multiprocessing/client.py.backup $VLLM_INSTALL_PATH/engine/multiprocessing/client.py
6068
}
6169

6270
function test_check_health {

0 commit comments

Comments
 (0)