@@ -47,16 +47,24 @@ function enable_health_check {
4747 echo -e " }" >> models/vllm_opt/config.pbtxt
4848}
4949
50+ VLLM_INSTALL_PATH=" /usr/local/lib/python3.12/dist-packages/vllm"
51+
5052function mock_vllm_async_llm_engine {
51- mv /opt/tritonserver/backends/vllm/model.py /opt/tritonserver/backends/vllm/.model.py.backup
52- cp /opt/tritonserver/backends/vllm/.model.py.backup /opt/tritonserver/backends/vllm/model.py
53- sed -i ' s/from vllm.engine.async_llm_engine import AsyncLLMEngine/from mock_async_llm_engine import mock_AsyncLLMEngine as AsyncLLMEngine/' /opt/tritonserver/backends/vllm/model.py
54- cp mock_async_llm_engine.py /opt/tritonserver/backends/vllm
53+ # backup original file
54+ mv $VLLM_INSTALL_PATH /engine/multiprocessing/client.py $VLLM_INSTALL_PATH /engine/multiprocessing/client.py.backup
55+ cp $VLLM_INSTALL_PATH /engine/multiprocessing/client.py.backup $VLLM_INSTALL_PATH /engine/multiprocessing/client.py
56+ # overwrite the original check_health method
57+ echo -e " " >> $VLLM_INSTALL_PATH /engine/multiprocessing/client.py
58+ echo -e " async def check_health(self, check_count=[0]):" >> $VLLM_INSTALL_PATH /engine/multiprocessing/client.py
59+ echo -e " check_count[0] += 1" >> $VLLM_INSTALL_PATH /engine/multiprocessing/client.py
60+ echo -e " if check_count[0] > 1:" >> $VLLM_INSTALL_PATH /engine/multiprocessing/client.py
61+ echo -e " raise RuntimeError(\" Simulated vLLM check_health() failure\" )" >> $VLLM_INSTALL_PATH /engine/multiprocessing/client.py
5562}
5663
5764function unmock_vllm_async_llm_engine {
58- rm -f /opt/tritonserver/backends/vllm/mock_async_llm_engine.py /opt/tritonserver/backends/vllm/model.py
59- mv /opt/tritonserver/backends/vllm/.model.py.backup /opt/tritonserver/backends/vllm/model.py
65+ # restore from backup
66+ rm -f $VLLM_INSTALL_PATH /engine/multiprocessing/client.py
67+ mv $VLLM_INSTALL_PATH /engine/multiprocessing/client.py.backup $VLLM_INSTALL_PATH /engine/multiprocessing/client.py
6068}
6169
6270function test_check_health {
0 commit comments