Skip to content

Commit 8a7cb87

Browse files
authored
[PD] turn on KV cache preemption.swap mode (#1925)
must work with hpu extention commit HabanaAI/vllm-hpu-extension@0bc6b42
1 parent a6984fb commit 8a7cb87

File tree

3 files changed

+5
-1
lines changed

3 files changed

+5
-1
lines changed

pd_xpyd/1p_start_prefill.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ CMD=(
4545
# Check if XPYD_LOG is set
4646
if [ -n "$XPYD_LOG" ]; then
4747
timestamp=$(date +"%Y%m%d_%H%M%S")
48-
log_file="$XPYD_LOG/ProxyServer_${timestamp}.log"
48+
log_file="$XPYD_LOG/Prefill_${timestamp}.log"
4949
echo "Logging to $log_file..."
5050

5151
# Execute command and log stdout+stderr using tee

pd_xpyd/dp_d_env.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ env | grep VLLM_DECODE_BS
4545
env | grep VLLM_DECODE_BLOCK
4646
# *************************************** bucketing ends ************************************* #
4747

48+
SWAP_SPACE=64 # GB, memory per rank for preemption.swap.
49+
4850
# decode specific settings
4951
export VLLM_DP_SIZE=2
5052
export VLLM_USE_V1=0

pd_xpyd/dp_start_decode.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ do
6565
--distributed_executor_backend mp
6666
--enable-reasoning
6767
--reasoning-parser deepseek_r1
68+
--preemption-mode swap
69+
--swap-space "$SWAP_SPACE"
6870
$kv_cache_dtype_arg
6971
--kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_consumer"}'
7072
)

0 commit comments

Comments
 (0)