Skip to content

Commit b41f716

Browse files
authored
vLLM adjustments (#91)
1 parent 3564c3c commit b41f716

File tree

2 files changed

+25
-12
lines changed

2 files changed

+25
-12
lines changed

ci/L0_backend_vllm/accuracy_test/accuracy_test.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ async def generate_python_vllm_output(
8282
return python_vllm_output
8383

8484

85-
def prepare_vllm_baseline_outputs(
85+
async def prepare_vllm_baseline_outputs(
8686
export_file="vllm_baseline_output.pkl", prompts=PROMPTS, guided_generation=None
8787
):
8888
"""
@@ -93,13 +93,12 @@ def prepare_vllm_baseline_outputs(
9393
llm_engine = AsyncLLMEngine.from_engine_args(AsyncEngineArgs(**VLLM_ENGINE_CONFIG))
9494
python_vllm_output = []
9595
for i in range(len(prompts)):
96-
python_vllm_output.extend(
97-
asyncio.run(
98-
generate_python_vllm_output(
99-
prompts[i], llm_engine, guided_generation=guided_generation
100-
)
101-
)
96+
output = await generate_python_vllm_output(
97+
prompts[i], llm_engine, guided_generation=guided_generation
10298
)
99+
if output:
100+
python_vllm_output.extend(output)
101+
103102
with open(export_file, "wb") as f:
104103
pickle.dump(python_vllm_output, f)
105104

@@ -240,7 +239,7 @@ def tearDown(self):
240239
)
241240
FLAGS = parser.parse_args()
242241
if FLAGS.generate_baseline:
243-
prepare_vllm_baseline_outputs()
242+
asyncio.run(prepare_vllm_baseline_outputs())
244243
exit(0)
245244

246245
if FLAGS.generate_guided_baseline:
@@ -249,10 +248,12 @@ def tearDown(self):
249248
"backend": "outlines",
250249
}
251250
guided_generation = GuidedDecodingParams(**guided_decoding_params)
252-
prepare_vllm_baseline_outputs(
253-
export_file="vllm_guided_baseline_output.pkl",
254-
prompts=GUIDED_PROMPTS,
255-
guided_generation=guided_generation,
251+
asyncio.run(
252+
prepare_vllm_baseline_outputs(
253+
export_file="vllm_guided_baseline_output.pkl",
254+
prompts=GUIDED_PROMPTS,
255+
guided_generation=guided_generation,
256+
)
256257
)
257258
exit(0)
258259

ci/L0_backend_vllm/accuracy_test/test.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,17 @@ RET=0
4848
set +e
4949
# Need to generate baseline first, since running 2 vLLM engines causes
5050
# memory issues: https://github.com/vllm-project/vllm/issues/2248
51+
export VLLM_USE_V1=0
52+
export VLLM_WORKER_MULTIPROC_METHOD=spawn
5153
python3 $CLIENT_PY --generate-baseline >> $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
5254
wait $BASELINE_PID
5355

5456
python3 $CLIENT_PY --generate-guided-baseline > $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
5557
wait $BASELINE_PID
5658

59+
unset VLLM_USE_V1
60+
unset VLLM_WORKER_MULTIPROC_METHOD
61+
5762
set -e
5863

5964
run_server
@@ -82,6 +87,13 @@ set -e
8287

8388
kill $SERVER_PID
8489
wait $SERVER_PID
90+
91+
# Check that warning about V1 Engine appears in log - this warning is expected
92+
if ! grep -q "Engine in background thread is experimental on VLLM_USE_V1=1. Falling back to V0 Engine." $SERVER_LOG; then
93+
echo -e "\n***\n*** ERROR: Expected warning about vLLM falling back to V0 Engine not found in logs.\n***"
94+
RET=1
95+
fi
96+
8597
rm -rf models/
8698

8799
if [ $RET -eq 1 ]; then

0 commit comments

Comments
 (0)