@@ -39,7 +39,7 @@ LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
3939LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env. wheelDockerImagePy312
4040
4141// DLFW torch image
42- DLFW_IMAGE = " urm.nvidia.com/docker/nvidia/pytorch:25.10 -py3"
42+ DLFW_IMAGE = " urm.nvidia.com/docker/nvidia/pytorch:25.12 -py3"
4343
4444// Ubuntu base image
4545UBUNTU_22_04_IMAGE = " urm.nvidia.com/docker/ubuntu:22.04"
@@ -316,6 +316,11 @@ def processShardTestList(llmSrc, testDBList, splitId, splits, perfMode=false) {
316316 foundRunningLine = true
317317 return false // Don't include the "Running" line itself
318318 }
319+ // Stop collecting when we hit the warnings/errors summary separator
320+ if (foundRunningLine && line. contains(' ======================' )) {
321+ foundRunningLine = false // Stop collecting
322+ return false
323+ }
319324
320325 def hasDoubleColon = line. contains(' ::' )
321326 def shouldInclude = foundRunningLine && hasDoubleColon
@@ -3389,7 +3394,7 @@ def launchTestJobs(pipeline, testFilter)
33893394 // Python version and OS for sanity check
33903395 x86SanityCheckConfigs = [
33913396 " PY312-DLFW" : [
3392- LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE ,
3397+ LLM_DOCKER_IMAGE , // Workaround ABI incompatibilities between PyTorch 2.9.1 and 2.10.0a0
33933398 " B200_PCIe" ,
33943399 X86_64_TRIPLE ,
33953400 false ,
@@ -3418,15 +3423,16 @@ def launchTestJobs(pipeline, testFilter)
34183423 ]
34193424
34203425 aarch64SanityCheckConfigs = [
3426+ /* //Disable PY312-UB2404 temporarily since lack of official PyTorch for CUDA 13.1.
34213427 "PY312-UB2404": [
34223428 LLM_DOCKER_IMAGE,
34233429 "GH200",
34243430 AARCH64_TRIPLE,
34253431 false,
34263432 "",
3427- UBUNTU_24_04_IMAGE ,
3428- true , // Extra PyTorch CUDA 13.0 install
3429- ],
3433+ DLFW_IMAGE ,
3434+ false , // Extra PyTorch CUDA 13.0 install
3435+ ],*/
34303436 " PY312-DLFW" : [
34313437 LLM_DOCKER_IMAGE ,
34323438 " GH200" ,
@@ -3524,17 +3530,17 @@ def launchTestJobs(pipeline, testFilter)
35243530 def platform = cpu_arch == X86_64_TRIPLE ? " x86_64" : " sbsa"
35253531 trtllm_utils. llmExecStepWithRetry(pipeline, script : " wget https://developer.download.nvidia.com/compute/cuda/repos/${ ubuntu_version} /${ platform} /cuda-keyring_1.1-1_all.deb" )
35263532 trtllm_utils. llmExecStepWithRetry(pipeline, script : " dpkg -i cuda-keyring_1.1-1_all.deb" )
3527- trtllm_utils. llmExecStepWithRetry(pipeline, script : " apt-get update && apt-get install -y cuda-toolkit-13-0 " )
3533+ trtllm_utils. llmExecStepWithRetry(pipeline, script : " apt-get update && apt-get install -y cuda-toolkit-13-1 " )
35283534 }
35293535 // Extra PyTorch CUDA 13.0 install for all bare-metal environments (Default PyTorch is for CUDA 12.8)
35303536 if (values[6 ]) {
35313537 echo " ###### Extra PyTorch CUDA 13.0 install Start ######"
35323538 // Use internal mirror instead of https://download.pytorch.org/whl/cu130 for better network stability.
35333539 // PyTorch CUDA 13.0 package and torchvision package can be installed as expected.
35343540 if (k8s_arch == " amd64" ) {
3535- trtllm_utils. llmExecStepWithRetry(pipeline, script : " pip3 install torch==2.9.0 +cu130 torchvision==0.24.0 +cu130 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/pytorch-cu128-remote/simple" )
3541+ trtllm_utils. llmExecStepWithRetry(pipeline, script : " pip3 install torch==2.9.1 +cu130 torchvision==0.24.1 +cu130 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/pytorch-cu128-remote/simple" )
35363542 } else {
3537- trtllm_utils. llmExecStepWithRetry(pipeline, script : " pip3 install torch==2.9.0 +cu130 torchvision==0.24.0 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/pytorch-cu128-remote/simple" )
3543+ trtllm_utils. llmExecStepWithRetry(pipeline, script : " pip3 install torch==2.9.1 +cu130 torchvision==0.24.1 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/pytorch-cu128-remote/simple" )
35383544 }
35393545 }
35403546
0 commit comments