Skip to content

Commit de0e50d

Browse files
committed
Remove pybind from CI/CD
Signed-off-by: Linda-Stadter <[email protected]>
1 parent 50890e4 commit de0e50d

File tree

6 files changed

+2
-46
lines changed

6 files changed

+2
-46
lines changed

.github/CODEOWNERS

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,6 @@ docs/source/performance/perf-benchmarking.md @NVIDIA/trtllm-bench-reviewers
185185
/tensorrt_llm/_torch/pyexecutor/resource_manager.py @NVIDIA/trt-llm-kv-cache-manager-devs
186186
/cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
187187
/cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
188-
/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
189-
/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
190188

191189
# The rule below requires that any PR modifying public APIs must be approved by at least one member
192190
# of the NVIDIA/trt-llm-committed-api-review-committee or NVIDIA/trt-llm-noncommitted-api-review-committee team.

docs/source/legacy/advanced/executor.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ Two C++ examples are provided that shows how to use the Executor API and can be
141141

142142
## Python Bindings for the Executor API
143143

144-
Python bindings for the Executor API are also available to use the Executor API from Python. The Python bindings are defined in [bindings.cpp](source:cpp/tensorrt_llm/pybind/executor/bindings.cpp) and once built, are available in package `tensorrt_llm.bindings.executor`. Running `'help('tensorrt_llm.bindings.executor')` in a Python interpreter will provide an overview of the classes available.
144+
Python bindings for the Executor API are also available to use the Executor API from Python. The Python bindings are defined in [bindings.cpp](source:cpp/tensorrt_llm/nanobind/executor/bindings.cpp) and once built, are available in package `tensorrt_llm.bindings.executor`. Running `'help('tensorrt_llm.bindings.executor')` in a Python interpreter will provide an overview of the classes available.
145145

146146
In addition, three Python examples are provided to demonstrate how to use the Python bindings to the Executor API for single and multi-GPU models. They can be found in [`examples/bindings`](source:examples/bindings).
147147

jenkins/Build.groovy

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,6 @@ def CONFIG_LINUX_AARCH64 = "linux_aarch64"
4949
@Field
5050
def CONFIG_LINUX_AARCH64_LLVM = "linux_aarch64_LLVM"
5151

52-
@Field
53-
def CONFIG_LINUX_X86_64_PYBIND = "linux_x86_64_Pybind"
54-
55-
@Field
56-
def CONFIG_LINUX_AARCH64_PYBIND = "linux_aarch64_Pybind"
5752

5853
@Field
5954
def BUILD_CONFIGS = [
@@ -64,11 +59,6 @@ def BUILD_CONFIGS = [
6459
(TARNAME) : "TensorRT-LLM.tar.gz",
6560
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
6661
],
67-
(CONFIG_LINUX_X86_64_PYBIND) : [
68-
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --extra-cmake-vars MOONCAKE_ROOT=/usr/local/Mooncake --micro_benchmarks",
69-
(TARNAME) : "pybind-TensorRT-LLM.tar.gz",
70-
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
71-
],
7262
(CONFIG_LINUX_X86_64_SINGLE_DEVICE) : [
7363
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=0 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars ENABLE_UCX=0 --micro_benchmarks",
7464
(TARNAME) : "single-device-TensorRT-LLM.tar.gz",
@@ -85,12 +75,6 @@ def BUILD_CONFIGS = [
8575
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
8676
(BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
8777
],
88-
(CONFIG_LINUX_AARCH64_PYBIND): [
89-
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --extra-cmake-vars MOONCAKE_ROOT=/usr/local/Mooncake",
90-
(TARNAME) : "pybind-TensorRT-LLM-GH200.tar.gz",
91-
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
92-
(BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
93-
],
9478
(CONFIG_LINUX_AARCH64_LLVM) : [
9579
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
9680
(TARNAME) : "llvm-TensorRT-LLM-GH200.tar.gz",
@@ -549,8 +533,6 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
549533
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
550534
"Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
551535
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
552-
"Build TRT-LLM Pybind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
553-
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_PYBIND : CONFIG_LINUX_X86_64_PYBIND),
554536
]
555537

556538
if (cpu_arch == X86_64_TRIPLE) {

jenkins/L0_MergeRequest.groovy

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ def getMultiGpuFileChanged(pipeline, testFilter, globalVars)
694694
"cpp/tensorrt_llm/plugins/gptAttentionPlugin/gptAttentionPlugin.cpp",
695695
"cpp/tensorrt_llm/plugins/gptAttentionPlugin/gptAttentionPlugin.h",
696696
"cpp/tensorrt_llm/plugins/ncclPlugin/",
697-
"cpp/tensorrt_llm/pybind/",
697+
"cpp/tensorrt_llm/nanobind/",
698698
"cpp/tensorrt_llm/runtime/ipcUtils.cpp",
699699
"cpp/tensorrt_llm/runtime/ncclCommunicator.cpp",
700700
"cpp/tensorrt_llm/runtime/utils/mpiUtils.cpp",

jenkins/L0_Test.groovy

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,13 @@ def LLVM_CONFIG = "LLVM"
6565
@Field
6666
def LINUX_AARCH64_CONFIG = "linux_aarch64"
6767

68-
@Field
69-
def PYBIND_CONFIG = "Pybind"
70-
7168
@Field
7269
def BUILD_CONFIGS = [
7370
// Vanilla TARNAME is used for packaging in runLLMPackage
7471
(VANILLA_CONFIG) : [(TARNAME) : "TensorRT-LLM.tar.gz"],
7572
(SINGLE_DEVICE_CONFIG) : [(TARNAME) : "single-device-TensorRT-LLM.tar.gz"],
7673
(LLVM_CONFIG) : [(TARNAME) : "llvm-TensorRT-LLM.tar.gz"],
7774
(LINUX_AARCH64_CONFIG) : [(TARNAME) : "TensorRT-LLM-GH200.tar.gz"],
78-
(PYBIND_CONFIG) : [(TARNAME) : "pybind-TensorRT-LLM.tar.gz"],
7975
]
8076

8177
// TODO: Move common variables to an unified location
@@ -3147,7 +3143,6 @@ def launchTestJobs(pipeline, testFilter)
31473143
"A10-TensorRT-3": ["a10", "l0_a10", 3, 5],
31483144
"A10-TensorRT-4": ["a10", "l0_a10", 4, 5],
31493145
"A10-TensorRT-5": ["a10", "l0_a10", 5, 5],
3150-
"A10-Pybind": ["a10", "l0_a10_pybind", 1, 1],
31513146
"A30-Triton-1": ["a30", "l0_a30", 1, 1],
31523147
"A30-PyTorch-1": ["a30", "l0_a30", 1, 2],
31533148
"A30-PyTorch-2": ["a30", "l0_a30", 2, 2],
@@ -3241,9 +3236,6 @@ def launchTestJobs(pipeline, testFilter)
32413236
if (key.contains("llvm")) {
32423237
config = LLVM_CONFIG
32433238
}
3244-
if (key.contains("Pybind")) {
3245-
config = PYBIND_CONFIG
3246-
}
32473239
runLLMTestlistOnPlatform(pipeline, values[0], values[1], config, key.contains("-Perf-"), key, values[2], values[3])
32483240
}]]}
32493241
fullSet = parallelJobs.keySet()

tests/integration/test_lists/test-db/l0_a10.yml

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -275,19 +275,3 @@ l0_a10:
275275
backend: fmha
276276
tests:
277277
- test_fmha.py::test_fmha TIMEOUT (90)
278-
l0_a10_pybind:
279-
- condition:
280-
ranges:
281-
system_gpu_count:
282-
gte: 1
283-
lte: 1
284-
wildcards:
285-
gpu:
286-
- '*a10*'
287-
linux_distribution_name: ubuntu*
288-
terms:
289-
stage: pre_merge
290-
tests:
291-
- unittest/bindings
292-
- test_e2e.py::test_openai_chat_example[trt]
293-
- test_e2e.py::test_openai_chat_example[pytorch] TIMEOUT (90)

0 commit comments

Comments
 (0)