55import os as _os
66import pathlib as _pl
77import subprocess as _sp
8+ import sys as _sys
89from typing import Generator , List , Optional , Sequence
910
11+ build_script_dir = _pl .Path (
12+ __file__ ).parent .resolve ().parent .parent .parent / "scripts"
13+ assert build_script_dir .is_dir ()
14+ _sys .path .append (str (build_script_dir ))
15+
1016from build_wheel import get_build_dir as get_trt_llm_build_dir
1117
1218default_test_parallel = 2
@@ -237,8 +243,7 @@ def produce_mpirun_command(*, global_commands, nranks, local_commands,
237243 return l [:- 1 ]
238244
239245
240- def run_multi_gpu_tests (build_dir : _pl .Path , timeout = 1500 ):
241-
246+ def run_simple_multi_gpu_tests (build_dir : _pl .Path , timeout = 1500 ):
242247 tests_dir = build_dir / "tests"
243248 cpp_env = {** _os .environ }
244249 # Utils tests
@@ -278,17 +283,27 @@ def run_multi_gpu_tests(build_dir: _pl.Path, timeout=1500):
278283 env = new_env ,
279284 timeout = 600 )
280285
281- xml_output_file = build_dir / "results-multi-gpu-real-decoder.xml"
282- trt_model_test = produce_mpirun_command (
283- global_commands = ["mpirun" , "--allow-run-as-root" ],
284- nranks = 4 ,
285- local_commands = [
286- "batch_manager/trtGptModelRealDecoderTest" ,
287- "--gtest_filter=*TP*:*PP*"
288- ],
289- leader_commands = [f"--gtest_output=xml:{ xml_output_file } " ])
290- run_command (trt_model_test , cwd = tests_dir , env = cpp_env ,
291- timeout = timeout ) # expecting ~ 1200s
286+ # UCX transceiver tests, the test may not be built if ENABLE_UCX is 0
287+ if _os .path .exists (
288+ _os .path .join (tests_dir , "batch_manager/ucxDataTransceiverTest" )):
289+ ucx_env = copy .copy (cpp_env )
290+ ucx_env ["UCX_MEMTYPE_CACHE" ] = "n"
291+ ucx_trans_test = [
292+ "mpirun" ,
293+ "-n" ,
294+ "2" ,
295+ "--allow-run-as-root" ,
296+ "batch_manager/ucxDataTransceiverTest" ,
297+ ]
298+ run_command (ucx_trans_test , cwd = tests_dir , env = ucx_env , timeout = 300 )
299+ else :
300+ _logger .info (
301+ "batch_manager/ucxDataTransceiverTest not found, so skipping." )
302+
303+
304+ def run_llama_executor_multi_gpu_tests (build_dir : _pl .Path , timeout = 1500 ):
305+ tests_dir = build_dir / "tests"
306+ cpp_env = {** _os .environ }
292307
293308 mgpu_env = copy .copy (cpp_env )
294309 mgpu_env ["RUN_LLAMA_MULTI_GPU" ] = "true"
@@ -316,19 +331,6 @@ def run_multi_gpu_tests(build_dir: _pl.Path, timeout=1500):
316331 ]
317332 run_command (trt_model_test , cwd = tests_dir , env = mgpu_env , timeout = 1500 )
318333
319- #EncDec test in leader mode
320- xml_output_file = build_dir / "results-multi-gpu-t5-exec-leader-mode.xml"
321- trt_model_test = produce_mpirun_command (
322- global_commands = ["mpirun" , "--allow-run-as-root" ],
323- nranks = 4 ,
324- local_commands = [
325- "executor/encDecTest" ,
326- "--gtest_filter=T5MultiGPUTest/EncDecParamsTest.Forward*"
327- ],
328- leader_commands = [f"--gtest_output=xml:{ xml_output_file } " ],
329- )
330- run_command (trt_model_test , cwd = tests_dir , env = cpp_env , timeout = 1500 )
331-
332334 #Logits processor and guided decoding test in leader mode
333335 xml_output_file = build_dir / "results-multi-gpu-logits-proc.xml"
334336 tp_pp_sizes = [(4 , 1 ), (2 , 2 ), (1 , 4 )]
@@ -350,24 +352,44 @@ def run_multi_gpu_tests(build_dir: _pl.Path, timeout=1500):
350352 leader_commands = [f"--gtest_output=xml:{ xml_output_file } " ])
351353 run_command (trt_model_test , cwd = tests_dir , env = mgpu_env , timeout = 1500 )
352354
353- # UCX transceiver tests, the test may not be built if ENABLE_UCX is 0
354- if _os .path .exists (
355- _os .path .join (tests_dir , "batch_manager/ucxDataTransceiverTest" )):
356- ucx_env = copy .copy (cpp_env )
357- ucx_env ["UCX_MEMTYPE_CACHE" ] = "n"
358- ucx_trans_test = [
359- "mpirun" ,
360- "-n" ,
361- "2" ,
362- "--allow-run-as-root" ,
363- "batch_manager/ucxDataTransceiverTest" ,
364- ]
365- run_command (ucx_trans_test , cwd = tests_dir , env = ucx_env , timeout = 300 )
366355
367- run_disagg_tests (build_dir )
356+ def run_t5_multi_gpu_tests (build_dir : _pl .Path , timeout = 1500 ):
357+ tests_dir = build_dir / "tests"
358+ cpp_env = {** _os .environ }
368359
360+ #EncDec test in leader mode
361+ xml_output_file = build_dir / "results-multi-gpu-t5-exec-leader-mode.xml"
362+ trt_model_test = produce_mpirun_command (
363+ global_commands = ["mpirun" , "--allow-run-as-root" ],
364+ nranks = 4 ,
365+ local_commands = [
366+ "executor/encDecTest" ,
367+ "--gtest_filter=T5MultiGPUTest/EncDecParamsTest.Forward*"
368+ ],
369+ leader_commands = [f"--gtest_output=xml:{ xml_output_file } " ],
370+ )
371+ run_command (trt_model_test , cwd = tests_dir , env = cpp_env , timeout = 1500 )
372+
373+
374+ def run_trt_gpt_model_real_decoder_multi_gpu_tests (build_dir : _pl .Path ,
375+ timeout = 1500 ):
376+ tests_dir = build_dir / "tests"
377+ cpp_env = {** _os .environ }
378+
379+ xml_output_file = build_dir / "results-multi-gpu-real-decoder.xml"
380+ trt_model_test = produce_mpirun_command (
381+ global_commands = ["mpirun" , "--allow-run-as-root" ],
382+ nranks = 4 ,
383+ local_commands = [
384+ "batch_manager/trtGptModelRealDecoderTest" ,
385+ "--gtest_filter=*TP*:*PP*"
386+ ],
387+ leader_commands = [f"--gtest_output=xml:{ xml_output_file } " ])
388+ run_command (trt_model_test , cwd = tests_dir , env = cpp_env ,
389+ timeout = timeout ) # expecting ~ 1200s
369390
370- def run_disagg_tests (build_dir : _pl .Path ):
391+
392+ def run_disagg_multi_gpu_tests (build_dir : _pl .Path ):
371393
372394 tests_dir = build_dir / "tests"
373395 cpp_env = {** _os .environ }
@@ -549,38 +571,28 @@ def prepare_model_tests(model_name: str,
549571 timeout = 600 )
550572
551573
552- def prepare_multi_gpu_model_tests (python_exe : str ,
574+ def prepare_multi_gpu_model_tests (test_list : List [str ],
575+ python_exe : str ,
553576 root_dir : _pl .Path ,
554577 resources_dir : _pl .Path ,
555578 model_cache : Optional [str ] = None ):
556579 model_cache_arg = ["--model_cache" , model_cache ] if model_cache else []
557- only_multi_gpu_arg = ["--only_multi_gpu" ]
558-
559- prepare_model_tests (model_name = "llama" ,
560- python_exe = python_exe ,
561- root_dir = root_dir ,
562- resources_dir = resources_dir ,
563- model_cache_arg = model_cache_arg ,
564- only_multi_gpu_arg = only_multi_gpu_arg )
565-
566- prepare_model_tests (model_name = "llama" ,
567- python_exe = python_exe ,
568- root_dir = root_dir ,
569- resources_dir = resources_dir ,
570- model_cache_arg = model_cache_arg )
571-
572- prepare_model_tests (model_name = "t5" ,
573- python_exe = python_exe ,
574- root_dir = root_dir ,
575- resources_dir = resources_dir ,
576- model_cache_arg = model_cache_arg ,
577- only_multi_gpu_arg = ['--tp' , '4' , '--pp' , '1' ])
578-
579- prepare_model_tests (model_name = "gpt" ,
580- python_exe = python_exe ,
581- root_dir = root_dir ,
582- resources_dir = resources_dir ,
583- model_cache_arg = model_cache_arg )
580+
581+ if "llama" in test_list :
582+ prepare_model_tests (model_name = "llama" ,
583+ python_exe = python_exe ,
584+ root_dir = root_dir ,
585+ resources_dir = resources_dir ,
586+ model_cache_arg = model_cache_arg ,
587+ only_multi_gpu_arg = ["--only_multi_gpu" ])
588+
589+ if "t5" in test_list :
590+ prepare_model_tests (model_name = "t5" ,
591+ python_exe = python_exe ,
592+ root_dir = root_dir ,
593+ resources_dir = resources_dir ,
594+ model_cache_arg = model_cache_arg ,
595+ only_multi_gpu_arg = ['--tp' , '4' , '--pp' , '1' ])
584596
585597
586598def run_single_gpu_tests (build_dir : _pl .Path ,
0 commit comments