File tree Expand file tree Collapse file tree 2 files changed +3
-3
lines changed
examples/backends/vllm/launch Expand file tree Collapse file tree 2 files changed +3
-3
lines changed Original file line number Diff line number Diff line change 55trap ' echo Cleaning up...; kill 0' EXIT
66
77# run ingress
8- python -m dynamo.frontend --router-mode kv -- http-port=8000 &
8+ python -m dynamo.frontend --http-port=8000 &
99
1010# --enforce-eager is added for quick deployment. for production use, need to remove this flag
1111CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager &
Original file line number Diff line number Diff line change 44set -e
55trap ' echo Cleaning up...; kill 0' EXIT
66
7- # run ingress with KV router
8- python -m dynamo.frontend --router-mode kv -- http-port=8000 &
7+ # run ingress
8+ python -m dynamo.frontend --http-port=8000 &
99
1010# run decode worker on GPU 0, without enabling KVBM
1111# NOTE: remove --enforce-eager for production use
You can’t perform that action at this time.
0 commit comments