Skip to content

Commit e5892d5

Browse files
authored
[PD] Script updates (#1909)
1. update for MoE chunk size 2. update for bug fixes 3. update for xpyd_log path 4. some refactor for easy reading & use 5. update for readme
1 parent c966c71 commit e5892d5

13 files changed

+191
-155
lines changed

pd_xpyd/1p_start_prefill.sh

Lines changed: 37 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,18 @@
11
#!/bin/bash
2-
32
BASH_DIR=$(dirname "${BASH_SOURCE[0]}")
43

5-
BENCHMARK_MODE=0
6-
7-
if [ "$2" == "benchmark" ]; then
8-
BENCHMARK_MODE=1
9-
sed -i 's/export VLLM_USE_ASYNC_TRANSFER_IN_PD=.*/export VLLM_USE_ASYNC_TRANSFER_IN_PD=0/' $BASH_DIR/pd_env.sh
10-
echo " Benchmark mode enabled"
11-
else
12-
sed -i 's/export VLLM_USE_ASYNC_TRANSFER_IN_PD=.*/export VLLM_USE_ASYNC_TRANSFER_IN_PD=1/' $BASH_DIR/pd_env.sh
13-
echo " Normal mode enabled"
14-
fi
15-
16-
if [ -z "$1" ] || [ "$1" == "g10" ] || [ "$1" == "pcie4" ]; then
17-
if [ "$BENCHMARK_MODE" == "1" ]; then
18-
source "$BASH_DIR"/start_etcd_mooncake_master.sh benchmark
19-
echo "source "$BASH_DIR"/start_etcd_mooncake_master.sh benchmark"
20-
else
21-
source "$BASH_DIR"/start_etcd_mooncake_master.sh
22-
echo "source "$BASH_DIR"/start_etcd_mooncake_master.sh"
23-
fi
4+
# for backward compatible. following nodes are started as mooncake master node
5+
if [ "$2" == "master" ] || [ -z "$1" ] || [ "$1" == "g10" ] || [ "$1" == "pcie4" ]; then
6+
source "$BASH_DIR"/start_etcd_mooncake_master.sh
7+
echo "source "$BASH_DIR"/start_etcd_mooncake_master.sh"
248
fi
259

26-
27-
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
2810
export MOONCAKE_CONFIG_PATH="$BASH_DIR"/mooncake_${1:-g10}.json
2911

3012
echo "Using Mooncake config: $MOONCAKE_CONFIG_PATH"
3113

3214
source "$BASH_DIR"/dp_p_env.sh
3315

34-
timestamp=$(date +"%Y%m%d_%H%M%S")
35-
log_dir="xpyd_logs"
36-
mkdir -p "$log_dir"
37-
log_file="$log_dir/prefill_${timestamp}.log"
3816

3917
if [ "$INC_FP8" -eq 1 ]; then
4018
kv_cache_dtype_arg="--kv-cache-dtype fp8_inc"
@@ -44,19 +22,36 @@ else
4422
echo "<prefill>it's bf16 kv cache mode"
4523
fi
4624

47-
python3 -m vllm.entrypoints.openai.api_server \
48-
--model "$model_path" \
49-
--port 8100 \
50-
--max-model-len "$model_len" \
51-
--gpu-memory-utilization "$VLLM_GPU_MEMORY_UTILIZATION" \
52-
-tp 8 \
53-
--max-num-seqs "$max_num_seqs" \
54-
--trust-remote-code \
55-
--disable-async-output-proc \
56-
--disable-log-requests \
57-
--max-num-batched-tokens "$max_num_batched_tokens" \
58-
--use-padding-aware-scheduling \
59-
--use-v2-block-manager \
60-
--distributed_executor_backend mp \
61-
$kv_cache_dtype_arg \
62-
--kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_producer"}' 2>&1 | tee "$log_file"
25+
# Define the Python command as an array
26+
CMD=(
27+
python3 -m vllm.entrypoints.openai.api_server
28+
--model "$model_path"
29+
--port 8100
30+
--max-model-len "$model_len"
31+
--gpu-memory-utilization "$VLLM_GPU_MEMORY_UTILIZATION"
32+
-tp 8
33+
--max-num-seqs "$max_num_seqs"
34+
--trust-remote-code
35+
--disable-async-output-proc
36+
--disable-log-requests
37+
--max-num-batched-tokens "$max_num_batched_tokens"
38+
--use-padding-aware-scheduling
39+
--use-v2-block-manager
40+
--distributed_executor_backend mp
41+
$kv_cache_dtype_arg
42+
--kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_producer"}'
43+
)
44+
45+
# Check if XPYD_LOG is set
46+
if [ -n "$XPYD_LOG" ]; then
47+
timestamp=$(date +"%Y%m%d_%H%M%S")
48+
log_file="$XPYD_LOG/ProxyServer_${timestamp}.log"
49+
echo "Logging to $log_file..."
50+
51+
# Execute command and log stdout+stderr using tee
52+
"${CMD[@]}" 2>&1 | tee "$log_file"
53+
else
54+
echo "XPYD_LOG not set, running without logging..."
55+
# Execute command without logging
56+
"${CMD[@]}"
57+
fi

pd_xpyd/2d_start_decode_head.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ unset VLLM_USE_V1
1010
unset VLLM_DP_MASTER_IP
1111
unset VLLM_DP_MASTER_PORT
1212

13+
ray stop --force
14+
15+
sleep 3s
16+
1317
ray start --head --port=8826
1418

1519
while true; do

pd_xpyd/2d_start_decode_node.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ unset VLLM_USE_V1
1010
unset VLLM_DP_MASTER_IP
1111
unset VLLM_DP_MASTER_PORT
1212

13+
ray stop --force
14+
15+
sleep 3s
16+
1317
ray start --address="${2:-10.239.129.81:8826}"
1418

1519

pd_xpyd/2p_start_prefill_head.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@ export MOONCAKE_CONFIG_PATH="$BASH_DIR"/mooncake_${1:-g10}.json
1111

1212
source "$BASH_DIR"/dp_p_env.sh
1313

14-
ray start --head --port=6886
14+
ray stop --force
15+
16+
sleep 3s
1517

18+
ray start --head --port=6886
1619

1720
while true; do
1821
read -p "Continue? (y): " answer

pd_xpyd/2p_start_prefill_node.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,9 @@ export MOONCAKE_CONFIG_PATH="$BASH_DIR"/mooncake_${1:-g12}.json
66

77
source "$BASH_DIR"/dp_p_env.sh
88

9+
ray stop --force
10+
11+
sleep 3s
12+
913
ray start --address="${2:-10.239.129.9:6886}"
1014

pd_xpyd/dp_d_env.sh

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ BASH_DIR=$(dirname "${BASH_SOURCE[0]}")
33
source "$BASH_DIR"/pd_bucket.sh
44
source "$BASH_DIR"/pd_env.sh
55

6-
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
7-
86
export VLLM_GPU_MEMORY_UTILIZATION=0.7
97
export VLLM_GRAPH_RESERVED_MEM=0.3
108
export VLLM_GRAPH_PROMPT_RATIO=0
@@ -20,6 +18,7 @@ input_min=128
2018
input_max=16384
2119
output_max=16384
2220

21+
# *************************************** bucketing ******************************************* #
2322
unset VLLM_PROMPT_BS_BUCKET_MIN VLLM_PROMPT_BS_BUCKET_STEP VLLM_PROMPT_BS_BUCKET_MAX
2423
unset VLLM_PROMPT_SEQ_BUCKET_MIN VLLM_PROMPT_SEQ_BUCKET_STEP VLLM_PROMPT_SEQ_BUCKET_MAX
2524
unset VLLM_DECODE_BS_BUCKET_MIN VLLM_DECODE_BS_BUCKET_STEP VLLM_DECODE_BS_BUCKET_MAX
@@ -35,7 +34,7 @@ export VLLM_PROMPT_SEQ_BUCKET_STEP=128
3534
export VLLM_PROMPT_SEQ_BUCKET_MAX=1
3635

3736
#export VLLM_DECODE_BLOCK_BUCKET_MIN=2048
38-
export VLLM_DECODE_BS_BUCKET_STEP=2
37+
#export VLLM_DECODE_BS_BUCKET_STEP=2
3938
#export VLLM_DECODE_BLOCK_BUCKET_STEP=2
4039

4140
echo " environments are reseted "
@@ -44,19 +43,25 @@ env | grep VLLM_PROMPT_BS
4443
env | grep VLLM_PROMPT_SEQ
4544
env | grep VLLM_DECODE_BS
4645
env | grep VLLM_DECODE_BLOCK
46+
# *************************************** bucketing ends ************************************* #
4747

48-
export VLLM_SKIP_WARMUP=True
49-
#unset VLLM_SKIP_WARMUP
50-
#export PT_HPU_RECIPE_CACHE_CONFIG=/workspace/ww33_inc_fp8_d,false,131072
51-
48+
# decode specific settings
5249
export VLLM_DP_SIZE=2
5350
export VLLM_USE_V1=0
5451
export VLLM_DP_MASTER_IP=10.239.129.81
5552
export VLLM_DP_MASTER_PORT=25940
5653
export VLLM_EP_SIZE=16
5754

58-
export PT_HPU_MOE_THRESHOLD=64
55+
# warmup settings
56+
export VLLM_SKIP_WARMUP=True
57+
#export PT_HPU_RECIPE_CACHE_CONFIG=/workspace/pd_d_cache,false,131072
58+
59+
# MoE settings
60+
export VLLM_SUPPORT_MOE_CHUNK="true"
61+
export PT_HPU_MOE_CHUNK="64, 128"
62+
export PT_HPU_MOE_TOKEN_BOUNDARY="2048, 4096" # to be fine tuned further
5963

64+
# INC FP8 settings
6065
if [ "$INC_FP8" -eq 1 ]; then
6166
export QUANT_CONFIG="$BASH_DIR"/inc_fp8_tp1ep16.json
6267
fi

pd_xpyd/dp_p_env.sh

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ BASH_DIR=$(dirname "${BASH_SOURCE[0]}")
44
source "$BASH_DIR"/pd_bucket.sh
55
source "$BASH_DIR"/pd_env.sh
66

7-
export VLLM_EP_SIZE=8
87

98
export VLLM_GPU_MEMORY_UTILIZATION=0.7
109
export VLLM_GRAPH_RESERVED_MEM=0.1
@@ -17,6 +16,7 @@ input_min=128
1716
input_max=16384
1817
output_max=16384
1918

19+
# *************************************** bucketing ******************************************* #
2020
unset VLLM_PROMPT_BS_BUCKET_MIN VLLM_PROMPT_BS_BUCKET_STEP VLLM_PROMPT_BS_BUCKET_MAX
2121
unset VLLM_PROMPT_SEQ_BUCKET_MIN VLLM_PROMPT_SEQ_BUCKET_STEP VLLM_PROMPT_SEQ_BUCKET_MAX
2222
unset VLLM_DECODE_BS_BUCKET_MIN VLLM_DECODE_BS_BUCKET_STEP VLLM_DECODE_BS_BUCKET_MAX
@@ -37,18 +37,24 @@ env | grep VLLM_PROMPT_BS
3737
env | grep VLLM_PROMPT_SEQ
3838
env | grep VLLM_DECODE_BS
3939
env | grep VLLM_DECODE_BLOCK
40+
# *************************************** bucketing ends ************************************* #
4041

42+
# prefill specific setting
4143
export VLLM_SKIP_PREFILL_SAMPLING=1
42-
43-
export VLLM_SKIP_WARMUP=True
4444
export VLLM_DP_SIZE=1
4545
export VLLM_USE_V1=0
46+
export VLLM_EP_SIZE=8
4647

47-
#unset VLLM_SKIP_WARMUP
48-
#export PT_HPU_RECIPE_CACHE_CONFIG=/workspace/ww33_inc_fp8_p,false,131072
48+
# warmup settings
49+
export VLLM_SKIP_WARMUP=True
50+
#export PT_HPU_RECIPE_CACHE_CONFIG=/workspace/pd_p_cache,false,131072
51+
52+
# MoE settings
53+
export VLLM_SUPPORT_MOE_CHUNK="false" # Can be true after following para are tuned.
54+
#export PT_HPU_MOE_CHUNK="64, 128"
55+
#export PT_HPU_MOE_TOKEN_BOUNDARY="2048, 4096"
4956

57+
# INC FP8 settings
5058
if [ "$INC_FP8" -eq 1 ]; then
5159
export QUANT_CONFIG="$BASH_DIR"/inc_fp8_tp8ep8.json
5260
fi
53-
54-
#python3 -m vllm.entrypoints.openai.api_server --model $model_path --port 8100 --max-model-len $model_len --gpu-memory-utilization $VLLM_GPU_MEMORY_UTILIZATION -tp 16 --max-num-seqs $max_num_seqs --trust-remote-code --disable-async-output-proc --kv-cache-dtype fp8_inc --disable-log-requests --max-num-batched-tokens $max_num_batched_tokens --use-padding-aware-scheduling --use-v2-block-manager --distributed_executor_backend ray --kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_producer"}'

pd_xpyd/dp_start_decode.sh

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@
55
BASH_DIR=$(dirname "${BASH_SOURCE[0]}")
66
source "$BASH_DIR"/dp_d_env.sh
77

8-
timestamp=$(date +"%Y%m%d_%H%M%S")
9-
log_dir="xpyd_logs"
10-
mkdir -p "$log_dir"
11-
128
export MOONCAKE_CONFIG_PATH="$BASH_DIR"/mooncake_$1.json
139
echo "MOONCAKE_CONFIG_PATH=$MOONCAKE_CONFIG_PATH"
1410

@@ -72,19 +68,34 @@ do
7268
$kv_cache_dtype_arg
7369
--kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_consumer"}'
7470
)
75-
log_file="$log_dir/log_rank${RANK}_${timestamp}.log"
71+
# Only define log_file if XPYD_LOG is set
72+
if [ -n "$XPYD_LOG" ]; then
73+
timestamp=$(date +"%Y%m%d_%H%M%S")
74+
log_file="$XPYD_LOG/log_rank${RANK}_${timestamp}.log"
75+
fi
7676

7777
extra_env=()
7878
# if [ "$i" -eq 0 ] && [ "$RANK" -eq 0 ]; then
7979
# extra_env+=(VLLM_PROFILER_ENABLED=true)
8080
# fi
8181

82+
# Execute command
8283
if [ "$DP_RANK" -ne 1 ]; then
83-
echo "env VLLM_DP_RANK=$RANK ${CMD[*]}"
84-
env VLLM_DP_RANK_LOCAL="$i" VLLM_DP_RANK="$RANK" "${extra_env[@]}" "${CMD[@]}" 2>&1 | tee "$log_file" &
84+
if [ -n "$XPYD_LOG" ]; then
85+
echo "env VLLM_DP_RANK=$RANK ${CMD[*]} (logging to $log_file)"
86+
env VLLM_DP_RANK_LOCAL="$i" VLLM_DP_RANK="$RANK" "${extra_env[@]}" "${CMD[@]}" 2>&1 | tee "$log_file" &
87+
else
88+
echo "env VLLM_DP_RANK=$RANK ${CMD[*]} (no logging)"
89+
env VLLM_DP_RANK_LOCAL="$i" VLLM_DP_RANK="$RANK" "${extra_env[@]}" "${CMD[@]}" &
90+
fi
8591
else
86-
echo "${CMD[*]}"
87-
"${CMD[@]}" &
92+
if [ -n "$XPYD_LOG" ]; then
93+
echo "${CMD[*]} (logging to $log_file)"
94+
"${CMD[@]}" 2>&1 | tee "$log_file" &
95+
else
96+
echo "${CMD[*]} (no logging)"
97+
"${CMD[@]}" &
98+
fi
8899
fi
89100
done
90101

0 commit comments

Comments
 (0)