Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions ci/L0_backend_vllm/accuracy_test/accuracy_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ async def generate_python_vllm_output(
return python_vllm_output


def prepare_vllm_baseline_outputs(
async def prepare_vllm_baseline_outputs(
export_file="vllm_baseline_output.pkl", prompts=PROMPTS, guided_generation=None
):
"""
Expand All @@ -93,13 +93,12 @@ def prepare_vllm_baseline_outputs(
llm_engine = AsyncLLMEngine.from_engine_args(AsyncEngineArgs(**VLLM_ENGINE_CONFIG))
python_vllm_output = []
for i in range(len(prompts)):
python_vllm_output.extend(
asyncio.run(
generate_python_vllm_output(
prompts[i], llm_engine, guided_generation=guided_generation
)
)
output = await generate_python_vllm_output(
prompts[i], llm_engine, guided_generation=guided_generation
)
if output:
python_vllm_output.extend(output)

with open(export_file, "wb") as f:
pickle.dump(python_vllm_output, f)

Expand Down Expand Up @@ -240,7 +239,7 @@ def tearDown(self):
)
FLAGS = parser.parse_args()
if FLAGS.generate_baseline:
prepare_vllm_baseline_outputs()
asyncio.run(prepare_vllm_baseline_outputs())
exit(0)

if FLAGS.generate_guided_baseline:
Expand All @@ -249,10 +248,12 @@ def tearDown(self):
"backend": "outlines",
}
guided_generation = GuidedDecodingParams(**guided_decoding_params)
prepare_vllm_baseline_outputs(
export_file="vllm_guided_baseline_output.pkl",
prompts=GUIDED_PROMPTS,
guided_generation=guided_generation,
asyncio.run(
prepare_vllm_baseline_outputs(
export_file="vllm_guided_baseline_output.pkl",
prompts=GUIDED_PROMPTS,
guided_generation=guided_generation,
)
)
exit(0)

Expand Down
12 changes: 12 additions & 0 deletions ci/L0_backend_vllm/accuracy_test/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,17 @@ RET=0
set +e
# Need to generate baseline first, since running 2 vLLM engines causes
# memory issues: https://github.com/vllm-project/vllm/issues/2248
export VLLM_USE_V1=0
export VLLM_WORKER_MULTIPROC_METHOD=spawn
python3 $CLIENT_PY --generate-baseline >> $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
wait $BASELINE_PID

python3 $CLIENT_PY --generate-guided-baseline > $VLLM_ENGINE_LOG 2>&1 & BASELINE_PID=$!
wait $BASELINE_PID

unset VLLM_USE_V1
unset VLLM_WORKER_MULTIPROC_METHOD

set -e

run_server
Expand Down Expand Up @@ -82,6 +87,13 @@ set -e

kill $SERVER_PID
wait $SERVER_PID

# Check that warning about V1 Engine appears in log - this warning is expected
if ! grep -q "Engine in background thread is experimental on VLLM_USE_V1=1. Falling back to V0 Engine." $SERVER_LOG; then
echo -e "\n***\n*** ERROR: Expected warning about vLLM falling back to V0 Engine not found in logs.\n***"
RET=1
fi

rm -rf models/

if [ $RET -eq 1 ]; then
Expand Down
Loading