@@ -16,9 +16,6 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
1616
1717LLM_DOCKER_IMAGE = env. dockerImage
1818
19- LLM_DOCKER_IMAGE_12_9 = " urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
20- LLM_SBSA_DOCKER_IMAGE_12_9 = " urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
21-
2219// Always use x86_64 image for agent
2320AGENT_IMAGE = env. dockerImage. replace(" aarch64" , " x86_64" )
2421
@@ -40,9 +37,6 @@ def BUILD_JOBS_FOR_CONFIG = "buildJobsForConfig"
4037@Field
4138def CONFIG_LINUX_X86_64_VANILLA = " linux_x86_64_Vanilla"
4239
43- @Field
44- def CONFIG_LINUX_X86_64_VANILLA_CU12 = " linux_x86_64_Vanilla_CU12"
45-
4640@Field
4741def CONFIG_LINUX_X86_64_SINGLE_DEVICE = " linux_x86_64_SingleDevice"
4842
@@ -52,9 +46,6 @@ def CONFIG_LINUX_X86_64_LLVM = "linux_x86_64_LLVM"
5246@Field
5347def CONFIG_LINUX_AARCH64 = " linux_aarch64"
5448
55- @Field
56- def CONFIG_LINUX_AARCH64_CU12 = " linux_aarch64_CU12"
57-
5849@Field
5950def CONFIG_LINUX_AARCH64_LLVM = " linux_aarch64_LLVM"
6051
@@ -73,11 +64,6 @@ def BUILD_CONFIGS = [
7364 (TARNAME ) : " TensorRT-LLM.tar.gz" ,
7465 (WHEEL_ARCHS ): " 80-real;86-real;89-real;90-real;100-real;103-real;120-real" ,
7566 ],
76- (CONFIG_LINUX_X86_64_VANILLA_CU12 ) : [
77- (WHEEL_EXTRA_ARGS ) : " --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks" ,
78- (TARNAME ) : " TensorRT-LLM-CU12.tar.gz" ,
79- (WHEEL_ARCHS ): " 80-real;86-real;89-real;90-real;100-real;103-real;120-real" ,
80- ],
8167 (CONFIG_LINUX_X86_64_PYBIND ) : [
8268 (WHEEL_EXTRA_ARGS ) : " --binding_type pybind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks" ,
8369 (TARNAME ) : " pybind-TensorRT-LLM.tar.gz" ,
@@ -99,12 +85,6 @@ def BUILD_CONFIGS = [
9985 (WHEEL_ARCHS ): " 90-real;100-real;103-real;120-real" ,
10086 (BUILD_JOBS_FOR_CONFIG ): " 6" ,
10187 ],
102- (CONFIG_LINUX_AARCH64_CU12 ): [
103- (WHEEL_EXTRA_ARGS ) : " --extra-cmake-vars WARNING_IS_ERROR=ON" ,
104- (TARNAME ) : " TensorRT-LLM-GH200-CU12.tar.gz" ,
105- (WHEEL_ARCHS ): " 90-real;100-real;103-real;120-real" ,
106- (BUILD_JOBS_FOR_CONFIG ): " 6" ,
107- ],
10888 (CONFIG_LINUX_AARCH64_PYBIND ): [
10989 (WHEEL_EXTRA_ARGS ) : " --binding_type pybind --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl" ,
11090 (TARNAME ) : " pybind-TensorRT-LLM-GH200.tar.gz" ,
@@ -454,9 +434,6 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
454434 pipArgs = " "
455435 }
456436
457- if (tarName. contains(" _CU12" )) {
458- trtllm_utils. llmExecStepWithRetry(pipeline, script : " cd ${ LLM_ROOT} && sed -i '/^# .*<For CUDA 12\\ .9>\$ / {s/^# //; n; s/^/# /}' requirements.txt && cat requirements.txt" )
459- }
460437 // install python package
461438 trtllm_utils. llmExecStepWithRetry(pipeline, script : " cd ${ LLM_ROOT} && pip3 install -r requirements-dev.txt ${ pipArgs} " )
462439
@@ -477,10 +454,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
477454 def llmPath = sh (script : " realpath ${ LLM_ROOT} " ,returnStdout : true ). trim()
478455 // TODO: Remove after the cmake version is upgraded to 3.31.8
479456 // Get triton tag from docker/dockerfile.multi
480- def tritonShortTag = " r25.08"
481- if (tarName. contains(" CU12" )) {
482- tritonShortTag = " r25.06"
483- }
457+ def tritonShortTag = " r25.09"
484458 sh " cd ${ LLM_ROOT} /triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${ llmPath} -DTRITON_COMMON_REPO_TAG=${ tritonShortTag} -DTRITON_CORE_REPO_TAG=${ tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${ tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${ tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${ buildJobs} install"
485459
486460 // Step 3: packaging wheels into tarfile
@@ -570,14 +544,9 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
570544 wheelDockerImage = env. dockerImage
571545 }
572546
573- def LLM_DOCKER_IMAGE_CU12 = cpu_arch == AARCH64_TRIPLE ? LLM_SBSA_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE_12_9
574-
575547 buildConfigs = [
576548 " Build TRT-LLM" : [LLM_DOCKER_IMAGE ] + prepareLLMBuild(
577549 pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA ),
578- // Disable CUDA12 build for too slow to build (cost > 5 hours on SBSA)
579- " Build TRT-LLM CUDA12" : [LLM_DOCKER_IMAGE_CU12 ] + prepareLLMBuild(
580- pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12 ),
581550 " Build TRT-LLM LLVM" : [LLM_DOCKER_IMAGE ] + prepareLLMBuild(
582551 pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM ),
583552 " Build TRT-LLM Pybind" : [LLM_DOCKER_IMAGE ] + prepareLLMBuild(
0 commit comments