@@ -150,6 +150,7 @@ def submit_job(config, log_dir):
150150 save_worker_config (config , gen_config_path , 'gen' )
151151
152152 # Prepare sbatch command
153+ # yapf: disable
153154 cmd = [
154155 'sbatch' ,
155156 f'--partition={ slurm_config ["partition" ]} ' ,
@@ -163,59 +164,60 @@ def submit_job(config, log_dir):
163164 * ([arg for arg in slurm_config ['extra_args' ].split () if arg ]),
164165 slurm_config ['script_file' ],
165166 # Hardware configuration
166- str (hw_config ['gpus_per_node' ]),
167- str (slurm_config ['numa_bind' ]).lower (),
168- str (ctx_nodes ), # Number of nodes needed for ctx workers
169- str (gen_nodes ), # Number of nodes needed for gen workers
170- str (ctx_world_size ), # World size for ctx workers
171- str (gen_world_size ), # World size for gen workers
167+ '--gpus-per-node' , str (hw_config ['gpus_per_node' ]),
168+ '--numa-bind' , str (slurm_config ['numa_bind' ]).lower (),
169+ '--ctx-nodes' , str (ctx_nodes ), # Number of nodes needed for ctx workers
170+ '--gen-nodes' , str (gen_nodes ), # Number of nodes needed for gen workers
171+ '--ctx-world-size' , str (ctx_world_size ), # World size for ctx workers
172+ '--gen-world-size' , str (gen_world_size ), # World size for gen workers
172173
173174 # Worker configuration
174- str (ctx_num ),
175- ctx_config_path ,
176- str (gen_num ),
177- gen_config_path ,
178- config ['benchmark' ]['concurrency_list' ],
175+ '--num-ctx-servers' , str (ctx_num ),
176+ '--ctx-config-path' , ctx_config_path ,
177+ '--num-gen-servers' , str (gen_num ),
178+ '--gen-config-path' , gen_config_path ,
179+ '--concurrency-list' , config ['benchmark' ]['concurrency_list' ],
179180
180181 # Sequence and benchmark parameters
181- str (config ['benchmark' ]['input_length' ]),
182- str (config ['benchmark' ]['output_length' ]),
183- str (config ['benchmark' ]['multi_round' ]),
184- str (config ['benchmark' ]['benchmark_ratio' ]),
185- str (config ['benchmark' ]['streaming' ]).lower (),
186- str (config ['benchmark' ]['use_nv_sa_benchmark' ]).lower (),
187- config ['benchmark' ]['mode' ],
188- str (config ['worker_config' ]['gen' ]['cache_transceiver_config' ]
182+ '--isl' , str (config ['benchmark' ]['input_length' ]),
183+ '--osl' , str (config ['benchmark' ]['output_length' ]),
184+ '--multi-round' , str (config ['benchmark' ]['multi_round' ]),
185+ '--benchmark-ratio' , str (config ['benchmark' ]['benchmark_ratio' ]),
186+ '--streaming' , str (config ['benchmark' ]['streaming' ]).lower (),
187+ '--use-nv-sa-benchmark' , str (config ['benchmark' ]['use_nv_sa_benchmark' ]).lower (),
188+ '--benchmark-mode' , config ['benchmark' ]['mode' ],
189+ '--cache-max-tokens' , str (config ['worker_config' ]['gen' ]['cache_transceiver_config' ]
189190 ['max_tokens_in_buffer' ]),
190191
191192 # Environment and paths
192- config ['benchmark' ]['dataset_file' ],
193- env_config ['model_path' ],
194- env_config ['trtllm_repo' ],
195- env_config ['work_dir' ],
196- log_dir , # Pass the generated log directory
197- env_config ['container_mount' ],
198- env_config ['container_image' ],
199- str (env_config ['build_wheel' ]).lower (),
200- env_config ['trtllm_wheel_path' ],
193+ '--dataset-file' , config ['benchmark' ]['dataset_file' ],
194+ '--model-path' , env_config ['model_path' ],
195+ '--trtllm-repo' , env_config ['trtllm_repo' ],
196+ '--work-dir' , env_config ['work_dir' ],
197+ '--full-logdir' , log_dir ,
198+ '--container-mount' , env_config ['container_mount' ],
199+ '--container-image' , env_config ['container_image' ],
200+ '--build-wheel' , str (env_config ['build_wheel' ]).lower (),
201+ '--trtllm-wheel-path' , env_config ['trtllm_wheel_path' ],
201202
202203 # Profiling
203- str (profiling_config ['nsys_on' ]).lower (),
204- profiling_config ['ctx_profile_range' ],
205- profiling_config ['gen_profile_range' ],
204+ '--nsys-on' , str (profiling_config ['nsys_on' ]).lower (),
205+ '--ctx-profile-range' , profiling_config ['ctx_profile_range' ],
206+ '--gen-profile-range' , profiling_config ['gen_profile_range' ],
206207
207208 # Accuracy evaluation
208- str (config ['accuracy' ]['enable_accuracy_test' ]).lower (),
209- config ['accuracy' ]['model' ],
210- config ['accuracy' ]['tasks' ],
211- config ['accuracy' ]['model_args_extra' ],
209+ '--enable-accuracy-test' , str (config ['accuracy' ]['enable_accuracy_test' ]).lower (),
210+ '--accuracy-model' , config ['accuracy' ]['model' ],
211+ '--accuracy-tasks' , config ['accuracy' ]['tasks' ],
212+ '--model-args-extra' , config ['accuracy' ]['model_args_extra' ],
212213
213214 # Worker environment variables
214- env_config ['worker_env_var' ],
215+ '--worker-env-var' , env_config ['worker_env_var' ],
215216
216217 # Server environment variables
217- env_config ['server_env_var' ]
218+ '--server-env-var' , env_config ['server_env_var' ]
218219 ]
220+ # yapf: enable
219221
220222 # Submit the job
221223 try :
0 commit comments