NVIDIA · Linda-Stadter · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026
@@ -185,8 +185,6 @@ docs/source/performance/perf-benchmarking.md @NVIDIA/trtllm-bench-reviewers
 /tensorrt_llm/_torch/pyexecutor/resource_manager.py @NVIDIA/trt-llm-kv-cache-manager-devs
 /cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
 /cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
-/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
-/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
 
 # The rule below requires that any PR modifying public APIs must be approved by at least one member
 # of the NVIDIA/trt-llm-committed-api-review-committee or NVIDIA/trt-llm-noncommitted-api-review-committee team.

@@ -83,11 +83,6 @@ endif()
 add_compile_definitions("TLLM_GEN_EXPORT_INTERFACE")
 add_compile_definitions("TLLM_ENABLE_CUDA")
 
-set(BINDING_TYPE
-    "nanobind"
-    CACHE STRING
-          "Binding type of Python bindings for C++ runtime and batch manager")
-
 set(INTERNAL_CUTLASS_KERNELS_PATH
     ""
     CACHE
@@ -246,16 +241,15 @@ get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH)
 set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)
 add_subdirectory(${3RDPARTY_DIR} 3rdparty)
 
-if(BINDING_TYPE STREQUAL "pybind"
-   OR BUILD_DEEP_EP
-   OR BUILD_DEEP_GEMM)
+if(BUILD_DEEP_EP
+   OR BUILD_DEEP_GEMM
+   OR BUILD_FLASH_MLA)
   FetchContent_MakeAvailable(pybind11)
   include_directories(${CMAKE_BINARY_DIR}/_deps/pybind11-src/include)
 endif()
-if(BINDING_TYPE STREQUAL "nanobind")
-  FetchContent_MakeAvailable(nanobind)
-  include_directories(${CMAKE_BINARY_DIR}/_deps/nanobind-src/include)
-endif()
+
+FetchContent_MakeAvailable(nanobind)
+include_directories(${CMAKE_BINARY_DIR}/_deps/nanobind-src/include)
 
 FetchContent_MakeAvailable(cutlass cxxopts flashmla json xgrammar)
 

diff --git a/cpp/tensorrt_llm/CMakeLists.txt b/cpp/tensorrt_llm/CMakeLists.txt
@@ -293,13 +293,7 @@ if(BUILD_PYT)
   add_subdirectory(thop)
 endif()
 
-if(BINDING_TYPE STREQUAL "pybind")
-  add_subdirectory(pybind)
-endif()
-
-if(BINDING_TYPE STREQUAL "nanobind")
-  add_subdirectory(nanobind)
-endif()
+add_subdirectory(nanobind)
 
 if(BUILD_DEEP_EP)
   add_subdirectory(deep_ep)

diff --git a/cpp/tensorrt_llm/executor/cache_transmission/nixl_utils/CMakeLists.txt b/cpp/tensorrt_llm/executor/cache_transmission/nixl_utils/CMakeLists.txt
@@ -65,23 +65,10 @@ if(NIXL_ENABLED OR MOONCAKE_ENABLED)
 
   # Collect binding source files
   set(AGENT_BINDING_SOURCES "")
-  if(BINDING_TYPE STREQUAL "pybind")
-    list(APPEND AGENT_BINDING_SOURCES agentBindingsPybind.cpp)
-  else()
-    list(APPEND AGENT_BINDING_SOURCES agentBindingsNanobind.cpp)
-  endif()
+  list(APPEND AGENT_BINDING_SOURCES agentBindings.cpp)
 
-  if(BINDING_TYPE STREQUAL "pybind")
-    # Use pybind11 (already fetched via FetchContent)
-    pybind11_add_module(${TRANSFER_AGENT_BINDING_TARGET}
-                        ${AGENT_BINDING_SOURCES})
-    message(STATUS "Building tensorrt_llm_transfer_agent_binding with pybind11")
-  else()
-    # Default to nanobind (already fetched via FetchContent)
-    nanobind_add_module(${TRANSFER_AGENT_BINDING_TARGET}
-                        ${AGENT_BINDING_SOURCES})
-    message(STATUS "Building tensorrt_llm_transfer_agent_binding with nanobind")
-  endif()
+  nanobind_add_module(${TRANSFER_AGENT_BINDING_TARGET} ${AGENT_BINDING_SOURCES})
+  message(STATUS "Building tensorrt_llm_transfer_agent_binding with nanobind")
 
   target_compile_options(${TRANSFER_AGENT_BINDING_TARGET} PRIVATE -Wno-error)
 

diff --git a/...sion/nixl_utils/agentBindingsNanobind.cpp → ...transmission/nixl_utils/agentBindings.cpp b/...sion/nixl_utils/agentBindingsNanobind.cpp → ...transmission/nixl_utils/agentBindings.cpp
diff --git a/cpp/tensorrt_llm/executor/cache_transmission/nixl_utils/agentBindingsPybind.cpp b/cpp/tensorrt_llm/executor/cache_transmission/nixl_utils/agentBindingsPybind.cpp
diff --git a/cpp/tensorrt_llm/nanobind/bindings.cpp b/cpp/tensorrt_llm/nanobind/bindings.cpp
@@ -80,7 +80,6 @@ tr::SamplingConfig makeSamplingConfig(std::vector<tr::SamplingConfig> const& con
 NB_MODULE(TRTLLM_NB_MODULE, m)
 {
     m.doc() = "TensorRT LLM Python bindings for C++ runtime";
-    m.attr("binding_type") = "nanobind";
     nb::set_leak_warnings(false);
 
     // Create MpiComm binding first since it's used in the executor bindings