@@ -1639,6 +1639,7 @@ def launchTestListCheck(pipeline)
16391639 sh " tar -zxf ${ tarName} "
16401640 def llmPath = sh (script : " realpath ." , returnStdout : true ). trim()
16411641 def llmSrc = " ${ llmPath} /TensorRT-LLM/src"
1642+ trtllm_utils. llmExecStepWithRetry(pipeline, script : " pip3 install -r ${ llmSrc} /requirements-dev.txt" )
16421643 sh " NVIDIA_TRITON_SERVER_VERSION=25.10 LLM_ROOT=${ llmSrc} LLM_BACKEND_ROOT=${ llmSrc} /triton_backend python3 ${ llmSrc} /scripts/check_test_list.py --l0 --qa --waive"
16431644 } catch (InterruptedException e) {
16441645 throw e
@@ -2903,8 +2904,10 @@ def launchTestJobs(pipeline, testFilter)
29032904 " DGX_B200-4_GPUs-PyTorch-2" : [" b200-x4" , " l0_dgx_b200" , 2 , 2 , 4 ],
29042905 " DGX_B200-4_GPUs-PyTorch-Ray-1" : [" b200-x4" , " l0_dgx_b200" , 1 , 1 , 4 ],
29052906 " DGX_B200-8_GPUs-PyTorch-1" : [" b200-x8" , " l0_dgx_b200" , 1 , 1 , 8 ],
2906- " DGX_B200-4_GPUs-PyTorch-Post-Merge-1" : [" b200-trtllm" , " l0_dgx_b200" , 1 , 1 , 4 , 1 , true ],
2907- " DGX_B300-4_GPUs-PyTorch-Post-Merge-1" : [" b300-x4" , " l0_dgx_b300" , 1 , 1 , 4 ],
2907+ " DGX_B200-4_GPUs-PyTorch-Post-Merge-1" : [" b200-trtllm" , " l0_dgx_b200" , 1 , 2 , 4 , 1 , true ],
2908+ " DGX_B200-4_GPUs-PyTorch-Post-Merge-2" : [" b200-trtllm" , " l0_dgx_b200" , 2 , 2 , 4 , 1 , true ],
2909+ " DGX_B300-4_GPUs-PyTorch-Post-Merge-1" : [" b300-x4" , " l0_dgx_b300" , 1 , 2 , 4 ],
2910+ " DGX_B300-4_GPUs-PyTorch-Post-Merge-2" : [" b300-x4" , " l0_dgx_b300" , 2 , 2 , 4 ],
29082911 // Perf sanity post merge test
29092912 // Disable perf stages due to https://nvbugs/5643646
29102913 // "DGX_B200-4_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b200-x4", "perf_sanity_l0_dgx_b200", 1, 1, 4],
@@ -2933,7 +2936,8 @@ def launchTestJobs(pipeline, testFilter)
29332936 fullSet + = SBSATestConfigs . keySet()
29342937
29352938 SBSASlurmTestConfigs = [
2936- " GB200-4_GPUs-PyTorch-1" : [" gb200-x4-oci" , " l0_gb200_multi_gpus" , 1 , 1 , 4 ],
2939+ " GB200-4_GPUs-PyTorch-1" : [" gb200-x4-oci" , " l0_gb200_multi_gpus" , 1 , 2 , 4 ],
2940+ " GB200-4_GPUs-PyTorch-2" : [" gb200-x4-oci" , " l0_gb200_multi_gpus" , 2 , 2 , 4 ],
29372941 " GB200-4_GPUs-PyTorch-Post-Merge-1" : [" gb200-x4-oci" , " l0_gb200_multi_gpus" , 1 , 1 , 4 ],
29382942 // Disable GB300 stages due to nodes will be offline temporarily.
29392943 // "GB300-PyTorch-1": ["gb300-single", "l0_gb300", 1, 1],
0 commit comments