88
99# Number of prefill and decode instances to create
1010NUM_PREFILL_INSTANCES=${NUM_PREFILL_INSTANCES:- 1} # Default to 1
11- NUM_DECODE_INSTANCES=${NUM_DECODE_INSTANCES:- 2} # Default to 2
11+ NUM_DECODE_INSTANCES=${NUM_DECODE_INSTANCES:- 1} # Default to 1
12+ PREFILLER_TP_SIZE=${PREFILLER_TP_SIZE:- 1}
13+ DECODER_TP_SIZE=${DECODER_TP_SIZE:- 1}
1214
1315# Find the git repository root directory
1416GIT_ROOT=$( git rev-parse --show-toplevel)
@@ -44,40 +46,6 @@ get_model_args() {
4446 echo " $extra_args "
4547}
4648
47- set_cli_args () {
48- PREFILLER_TP_SIZE=1
49- DECODER_TP_SIZE=1
50- # Iterate through the rest of the arguments
51- while [[ $# -gt 0 ]]; do
52- echo $#
53- case " $1 " in
54- --prefiller-tp-size)
55- if [[ -n " $2 " ]]; then
56- PREFILLER_TP_SIZE=" $2 "
57- shift 2 # Consume the flag and its value ($2)
58- else
59- echo " Error: --prefiller-tp-size requires a value." >&2
60- exit 1
61- fi
62- ;;
63- --decoder-tp-size)
64- if [[ -n " $2 " ]]; then
65- DECODER_TP_SIZE=" $2 "
66- shift 2
67- else
68- echo " Error: --decoder-tp-size requires a value." >&2
69- exit 1
70- fi
71- ;;
72- * )
73- # Handle any arguments not recognized
74- shift # Ignore unknown argument
75- ;;
76- esac
77- done
78- }
79-
80-
8149# Function to run tests for a specific model
8250run_tests_for_model () {
8351 local model_name=$1
@@ -100,10 +68,11 @@ run_tests_for_model() {
10068 # Calculate GPU ID - we'll distribute across available GPUs
10169 GPU_ID=$(( i % $(nvidia- smi -- query- gpu= name -- format= csv, noheader | wc - l)) )
10270
71+
10372 # Calculate port number (base port + instance number)
10473 PORT=$(( 8100 + i))
10574 # Calculate side channel port. Avoid clash with with TP workers.
106- SIDE_CHANNEL_PORT=$(( 5559 + i * $PREFILLER_TP_SIZE ))
75+ SIDE_CHANNEL_PORT=$(( 5559 + i))
10776
10877 echo " Starting prefill instance $i on GPU $GPU_ID , port $PORT "
10978
@@ -122,7 +91,7 @@ run_tests_for_model() {
12291 FULL_CMD=" $BASE_CMD "
12392 fi
12493
125- eval " $FULL_CMD &"
94+ eval " $FULL_CMD 2>&1 > out_prefiller &"
12695
12796 # Store host and port for proxy configuration
12897 PREFILL_HOSTS+=(" localhost" )
@@ -137,7 +106,7 @@ run_tests_for_model() {
137106 # Calculate port number (base port + instance number)
138107 PORT=$(( 8200 + i))
139108 # Calculate side channel port
140- SIDE_CHANNEL_PORT=$(( 5659 + i * $PREFILLER_TP_SIZE ))
109+ SIDE_CHANNEL_PORT=$(( 5659 + i * $DECODER_TP_SIZE ))
141110
142111 echo " Starting decode instance $i on GPU $GPU_ID , port $PORT "
143112
@@ -156,7 +125,7 @@ run_tests_for_model() {
156125 FULL_CMD=" $BASE_CMD "
157126 fi
158127
159- eval " $FULL_CMD &"
128+ eval " $FULL_CMD 2>&1 > out_decoder &"
160129
161130 # Store host and port for proxy configuration
162131 DECODE_HOSTS+=(" localhost" )
0 commit comments