Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,6 @@ docs/source/performance/perf-benchmarking.md @NVIDIA/trtllm-bench-reviewers
/tensorrt_llm/_torch/pyexecutor/resource_manager.py @NVIDIA/trt-llm-kv-cache-manager-devs
/cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
/cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs
/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h @NVIDIA/trt-llm-kv-cache-manager-devs
/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp @NVIDIA/trt-llm-kv-cache-manager-devs

# The rule below requires that any PR modifying public APIs must be approved by at least one member
# of the NVIDIA/trt-llm-committed-api-review-committee or NVIDIA/trt-llm-noncommitted-api-review-committee team.
Expand Down
18 changes: 6 additions & 12 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,6 @@ endif()
add_compile_definitions("TLLM_GEN_EXPORT_INTERFACE")
add_compile_definitions("TLLM_ENABLE_CUDA")

set(BINDING_TYPE
"nanobind"
CACHE STRING
"Binding type of Python bindings for C++ runtime and batch manager")

set(INTERNAL_CUTLASS_KERNELS_PATH
""
CACHE
Expand Down Expand Up @@ -246,16 +241,15 @@ get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH)
set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)
add_subdirectory(${3RDPARTY_DIR} 3rdparty)

if(BINDING_TYPE STREQUAL "pybind"
OR BUILD_DEEP_EP
OR BUILD_DEEP_GEMM)
if(BUILD_DEEP_EP
OR BUILD_DEEP_GEMM
OR BUILD_FLASH_MLA)
FetchContent_MakeAvailable(pybind11)
include_directories(${CMAKE_BINARY_DIR}/_deps/pybind11-src/include)
endif()
if(BINDING_TYPE STREQUAL "nanobind")
FetchContent_MakeAvailable(nanobind)
include_directories(${CMAKE_BINARY_DIR}/_deps/nanobind-src/include)
endif()

FetchContent_MakeAvailable(nanobind)
include_directories(${CMAKE_BINARY_DIR}/_deps/nanobind-src/include)

FetchContent_MakeAvailable(cutlass cxxopts flashmla json xgrammar)

Expand Down
8 changes: 1 addition & 7 deletions cpp/tensorrt_llm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -293,13 +293,7 @@ if(BUILD_PYT)
add_subdirectory(thop)
endif()

if(BINDING_TYPE STREQUAL "pybind")
add_subdirectory(pybind)
endif()

if(BINDING_TYPE STREQUAL "nanobind")
add_subdirectory(nanobind)
endif()
add_subdirectory(nanobind)

if(BUILD_DEEP_EP)
add_subdirectory(deep_ep)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,23 +65,10 @@ if(NIXL_ENABLED OR MOONCAKE_ENABLED)

# Collect binding source files
set(AGENT_BINDING_SOURCES "")
if(BINDING_TYPE STREQUAL "pybind")
list(APPEND AGENT_BINDING_SOURCES agentBindingsPybind.cpp)
else()
list(APPEND AGENT_BINDING_SOURCES agentBindingsNanobind.cpp)
endif()
list(APPEND AGENT_BINDING_SOURCES agentBindings.cpp)

if(BINDING_TYPE STREQUAL "pybind")
# Use pybind11 (already fetched via FetchContent)
pybind11_add_module(${TRANSFER_AGENT_BINDING_TARGET}
${AGENT_BINDING_SOURCES})
message(STATUS "Building tensorrt_llm_transfer_agent_binding with pybind11")
else()
# Default to nanobind (already fetched via FetchContent)
nanobind_add_module(${TRANSFER_AGENT_BINDING_TARGET}
${AGENT_BINDING_SOURCES})
message(STATUS "Building tensorrt_llm_transfer_agent_binding with nanobind")
endif()
nanobind_add_module(${TRANSFER_AGENT_BINDING_TARGET} ${AGENT_BINDING_SOURCES})
message(STATUS "Building tensorrt_llm_transfer_agent_binding with nanobind")

target_compile_options(${TRANSFER_AGENT_BINDING_TARGET} PRIVATE -Wno-error)

Expand Down

This file was deleted.

1 change: 0 additions & 1 deletion cpp/tensorrt_llm/nanobind/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ tr::SamplingConfig makeSamplingConfig(std::vector<tr::SamplingConfig> const& con
NB_MODULE(TRTLLM_NB_MODULE, m)
{
m.doc() = "TensorRT LLM Python bindings for C++ runtime";
m.attr("binding_type") = "nanobind";
nb::set_leak_warnings(false);

// Create MpiComm binding first since it's used in the executor bindings
Expand Down
Loading