|
1 | 1 | #!/bin/bash |
2 | | - |
3 | 2 | BASH_DIR=$(dirname "${BASH_SOURCE[0]}") |
4 | 3 |
|
5 | | -BENCHMARK_MODE=0 |
6 | | - |
7 | | -if [ "$2" == "benchmark" ]; then |
8 | | - BENCHMARK_MODE=1 |
9 | | - sed -i 's/export VLLM_USE_ASYNC_TRANSFER_IN_PD=.*/export VLLM_USE_ASYNC_TRANSFER_IN_PD=0/' $BASH_DIR/pd_env.sh |
10 | | - echo " Benchmark mode enabled" |
11 | | -else |
12 | | - sed -i 's/export VLLM_USE_ASYNC_TRANSFER_IN_PD=.*/export VLLM_USE_ASYNC_TRANSFER_IN_PD=1/' $BASH_DIR/pd_env.sh |
13 | | - echo " Normal mode enabled" |
14 | | -fi |
15 | | - |
16 | | -if [ -z "$1" ] || [ "$1" == "g10" ] || [ "$1" == "pcie4" ]; then |
17 | | - if [ "$BENCHMARK_MODE" == "1" ]; then |
18 | | - source "$BASH_DIR"/start_etcd_mooncake_master.sh benchmark |
19 | | - echo "source "$BASH_DIR"/start_etcd_mooncake_master.sh benchmark" |
20 | | - else |
21 | | - source "$BASH_DIR"/start_etcd_mooncake_master.sh |
22 | | - echo "source "$BASH_DIR"/start_etcd_mooncake_master.sh" |
23 | | - fi |
| 4 | +# for backward compatible. following nodes are started as mooncake master node |
| 5 | +if [ "$2" == "master" ] || [ -z "$1" ] || [ "$1" == "g10" ] || [ "$1" == "pcie4" ]; then |
| 6 | + source "$BASH_DIR"/start_etcd_mooncake_master.sh |
| 7 | + echo "source "$BASH_DIR"/start_etcd_mooncake_master.sh" |
24 | 8 | fi |
25 | 9 |
|
26 | | - |
27 | | -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib |
28 | 10 | export MOONCAKE_CONFIG_PATH="$BASH_DIR"/mooncake_${1:-g10}.json |
29 | 11 |
|
30 | 12 | echo "Using Mooncake config: $MOONCAKE_CONFIG_PATH" |
31 | 13 |
|
32 | 14 | source "$BASH_DIR"/dp_p_env.sh |
33 | 15 |
|
34 | | -timestamp=$(date +"%Y%m%d_%H%M%S") |
35 | | -log_dir="xpyd_logs" |
36 | | -mkdir -p "$log_dir" |
37 | | -log_file="$log_dir/prefill_${timestamp}.log" |
38 | 16 |
|
39 | 17 | if [ "$INC_FP8" -eq 1 ]; then |
40 | 18 | kv_cache_dtype_arg="--kv-cache-dtype fp8_inc" |
|
44 | 22 | echo "<prefill>it's bf16 kv cache mode" |
45 | 23 | fi |
46 | 24 |
|
47 | | -python3 -m vllm.entrypoints.openai.api_server \ |
48 | | - --model "$model_path" \ |
49 | | - --port 8100 \ |
50 | | - --max-model-len "$model_len" \ |
51 | | - --gpu-memory-utilization "$VLLM_GPU_MEMORY_UTILIZATION" \ |
52 | | - -tp 8 \ |
53 | | - --max-num-seqs "$max_num_seqs" \ |
54 | | - --trust-remote-code \ |
55 | | - --disable-async-output-proc \ |
56 | | - --disable-log-requests \ |
57 | | - --max-num-batched-tokens "$max_num_batched_tokens" \ |
58 | | - --use-padding-aware-scheduling \ |
59 | | - --use-v2-block-manager \ |
60 | | - --distributed_executor_backend mp \ |
61 | | - $kv_cache_dtype_arg \ |
62 | | - --kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_producer"}' 2>&1 | tee "$log_file" |
| 25 | +# Define the Python command as an array |
| 26 | +CMD=( |
| 27 | + python3 -m vllm.entrypoints.openai.api_server |
| 28 | + --model "$model_path" |
| 29 | + --port 8100 |
| 30 | + --max-model-len "$model_len" |
| 31 | + --gpu-memory-utilization "$VLLM_GPU_MEMORY_UTILIZATION" |
| 32 | + -tp 8 |
| 33 | + --max-num-seqs "$max_num_seqs" |
| 34 | + --trust-remote-code |
| 35 | + --disable-async-output-proc |
| 36 | + --disable-log-requests |
| 37 | + --max-num-batched-tokens "$max_num_batched_tokens" |
| 38 | + --use-padding-aware-scheduling |
| 39 | + --use-v2-block-manager |
| 40 | + --distributed_executor_backend mp |
| 41 | + $kv_cache_dtype_arg |
| 42 | + --kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_producer"}' |
| 43 | +) |
| 44 | + |
| 45 | +# Check if XPYD_LOG is set |
| 46 | +if [ -n "$XPYD_LOG" ]; then |
| 47 | + timestamp=$(date +"%Y%m%d_%H%M%S") |
| 48 | + log_file="$XPYD_LOG/ProxyServer_${timestamp}.log" |
| 49 | + echo "Logging to $log_file..." |
| 50 | + |
| 51 | + # Execute command and log stdout+stderr using tee |
| 52 | + "${CMD[@]}" 2>&1 | tee "$log_file" |
| 53 | +else |
| 54 | + echo "XPYD_LOG not set, running without logging..." |
| 55 | + # Execute command without logging |
| 56 | + "${CMD[@]}" |
| 57 | +fi |
0 commit comments