diff --git a/.gitmodules b/.gitmodules index 555349aa253..31970ad4054 100644 --- a/.gitmodules +++ b/.gitmodules @@ -20,3 +20,6 @@ [submodule "3rdparty/xgrammar"] path = 3rdparty/xgrammar url = https://github.com/mlc-ai/xgrammar.git +[submodule "3rdparty/nanobind"] + path = 3rdparty/nanobind + url = https://github.com/wjakob/nanobind diff --git a/3rdparty/nanobind b/3rdparty/nanobind new file mode 160000 index 00000000000..3d577d099a0 --- /dev/null +++ b/3rdparty/nanobind @@ -0,0 +1 @@ +Subproject commit 3d577d099a05f71a7860d8c6d80d2dd1fb92d9e1 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d82bebb73c4..1699d0dc640 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -170,6 +170,7 @@ get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH) set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty) add_subdirectory(${3RDPARTY_DIR}/pybind11 ${CMAKE_CURRENT_BINARY_DIR}/pybind11) +add_subdirectory(${3RDPARTY_DIR}/nanobind ${CMAKE_CURRENT_BINARY_DIR}/nanobind) # include as system to suppress warnings include_directories( @@ -181,7 +182,8 @@ include_directories( ${3RDPARTY_DIR}/cutlass/tools/util/include ${3RDPARTY_DIR}/NVTX/include ${3RDPARTY_DIR}/json/include - ${3RDPARTY_DIR}/pybind11/include) + ${3RDPARTY_DIR}/pybind11/include + ${3RDPARTY_DIR}/nanobind/include) if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL "11") add_definitions("-DENABLE_BF16") diff --git a/cpp/tensorrt_llm/pybind/CMakeLists.txt b/cpp/tensorrt_llm/pybind/CMakeLists.txt index f02599e6089..834d7235743 100755 --- a/cpp/tensorrt_llm/pybind/CMakeLists.txt +++ b/cpp/tensorrt_llm/pybind/CMakeLists.txt @@ -23,7 +23,7 @@ set(SRCS include_directories(${PROJECT_SOURCE_DIR}/include) -pybind11_add_module(${TRTLLM_PYBIND_MODULE} ${SRCS}) +nanobind_add_module(${TRTLLM_PYBIND_MODULE} ${SRCS}) set_property(TARGET ${TRTLLM_PYBIND_MODULE} PROPERTY POSITION_INDEPENDENT_CODE ON) @@ -34,9 +34,8 @@ target_link_libraries( ${TRTLLM_PYBIND_MODULE} PUBLIC ${SHARED_TARGET} ${UNDEFINED_FLAG} ${NO_AS_NEEDED_FLAG} ${Python3_LIBRARIES} ${TORCH_LIBRARIES} torch_python) -target_compile_definitions( - ${TRTLLM_PYBIND_MODULE} PUBLIC TRTLLM_PYBIND_MODULE=${TRTLLM_PYBIND_MODULE} - PYBIND11_DETAILED_ERROR_MESSAGES=1) +target_compile_definitions(${TRTLLM_PYBIND_MODULE} + PUBLIC TRTLLM_PYBIND_MODULE=${TRTLLM_PYBIND_MODULE}) if(NOT WIN32) set_target_properties( diff --git a/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp b/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp index 4274bbe62dc..bc8d327a19e 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp @@ -30,14 +30,12 @@ #include "tensorrt_llm/runtime/torchView.h" #include -#include -#include -#include -#include -#include + +#include +#include #include -namespace py = pybind11; +namespace py = nanobind; namespace tb = tensorrt_llm::batch_manager; namespace tle = tensorrt_llm::executor; namespace tr = tensorrt_llm::runtime; @@ -47,7 +45,7 @@ using namespace tensorrt_llm::runtime; namespace tensorrt_llm::pybind::batch_manager { -void initBindings(pybind11::module_& m) +void initBindings(py::module_& m) { using GenLlmReq = tb::GenericLlmRequest; diff --git a/cpp/tensorrt_llm/pybind/batch_manager/bindings.h b/cpp/tensorrt_llm/pybind/batch_manager/bindings.h index 4c36ea3f78c..d57694e72cc 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/bindings.h +++ b/cpp/tensorrt_llm/pybind/batch_manager/bindings.h @@ -18,11 +18,11 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::pybind::batch_manager { -void initBindings(pybind11::module_& m); +void initBindings(nanobind::module_& m); } diff --git a/cpp/tensorrt_llm/pybind/batch_manager/buffers.cpp b/cpp/tensorrt_llm/pybind/batch_manager/buffers.cpp index 721b12f6872..1f438ec027f 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/buffers.cpp +++ b/cpp/tensorrt_llm/pybind/batch_manager/buffers.cpp @@ -23,13 +23,9 @@ #include "tensorrt_llm/batch_manager/transformerBuffers.h" #include -#include -#include -#include -#include #include -namespace py = pybind11; +namespace py = nanobind; namespace tb = tensorrt_llm::batch_manager; namespace tr = tensorrt_llm::runtime; @@ -38,7 +34,7 @@ using tr::SizeType32; namespace tensorrt_llm::pybind::batch_manager { -void Buffers::initBindings(pybind11::module_& m) +void Buffers::initBindings(py::module_& m) { py::class_(m, "TransformerBuffers") .def(py::init const&, SizeType32, SizeType32, diff --git a/cpp/tensorrt_llm/pybind/batch_manager/buffers.h b/cpp/tensorrt_llm/pybind/batch_manager/buffers.h index bfe06c0e8e8..29cba8fdfc5 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/buffers.h +++ b/cpp/tensorrt_llm/pybind/batch_manager/buffers.h @@ -18,13 +18,13 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::pybind::batch_manager { class Buffers { public: - static void initBindings(pybind11::module_& m); + static void initBindings(nanobind::module_& m); }; } // namespace tensorrt_llm::pybind::batch_manager diff --git a/cpp/tensorrt_llm/pybind/batch_manager/cacheTransceiver.h b/cpp/tensorrt_llm/pybind/batch_manager/cacheTransceiver.h index 71221d8a7cb..49f65619e60 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/cacheTransceiver.h +++ b/cpp/tensorrt_llm/pybind/batch_manager/cacheTransceiver.h @@ -18,13 +18,13 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::batch_manager { class CacheTransceiverBindings { public: - static void initBindings(pybind11::module_& m); + static void initBindings(nanobind::module_& m); }; } // namespace tensorrt_llm::batch_manager diff --git a/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp b/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp index 5be47790c9a..ee7032851e1 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp +++ b/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.cpp @@ -23,16 +23,12 @@ #include "tensorrt_llm/runtime/torchView.h" #include -#include -#include -#include -#include #include namespace tb = tensorrt_llm::batch_manager; namespace tbk = tensorrt_llm::batch_manager::kv_cache_manager; namespace tr = tensorrt_llm::runtime; -namespace py = pybind11; +namespace py = nanobind; using BlockKey = tbk::BlockKey; using VecUniqueTokens = tensorrt_llm::runtime::VecUniqueTokens; using SizeType32 = tensorrt_llm::runtime::SizeType32; diff --git a/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h b/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h index 67d8b13ca71..96c9235c85d 100644 --- a/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h +++ b/cpp/tensorrt_llm/pybind/batch_manager/kvCacheManager.h @@ -18,14 +18,14 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::batch_manager::kv_cache_manager { class KVCacheManagerBindings { public: - static void initBindings(pybind11::module_& m); + static void initBindings(nanobind::module_& m); }; } // namespace tensorrt_llm::batch_manager::kv_cache_manager @@ -34,6 +34,6 @@ namespace tensorrt_llm::batch_manager class BasePeftCacheManagerBindings { public: - static void initBindings(pybind11::module_& m); + static void initBindings(nanobind::module_& m); }; } // namespace tensorrt_llm::batch_manager diff --git a/cpp/tensorrt_llm/pybind/bindings.cpp b/cpp/tensorrt_llm/pybind/bindings.cpp index ebda5773abb..178b7fcb957 100644 --- a/cpp/tensorrt_llm/pybind/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/bindings.cpp @@ -15,11 +15,8 @@ * limitations under the License. */ -#include -#include -#include -#include -#include +#include +#include #include #include @@ -45,7 +42,7 @@ #include "tensorrt_llm/runtime/samplingConfig.h" #include "tensorrt_llm/runtime/utils/mpiUtils.h" -namespace py = pybind11; +namespace py = nanobind; namespace tb = tensorrt_llm::batch_manager; namespace tbk = tensorrt_llm::batch_manager::kv_cache_manager; namespace tpb = tensorrt_llm::pybind::batch_manager; @@ -69,7 +66,7 @@ tr::SamplingConfig makeSamplingConfig(std::vector const& con } } // namespace -PYBIND11_MODULE(TRTLLM_PYBIND_MODULE, m) +NB_MODULE(TRTLLM_PYBIND_MODULE, m) { m.doc() = "TensorRT-LLM Python bindings for C++ runtime"; diff --git a/cpp/tensorrt_llm/pybind/executor/bindings.cpp b/cpp/tensorrt_llm/pybind/executor/bindings.cpp index 502ab705374..5403031716d 100644 --- a/cpp/tensorrt_llm/pybind/executor/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/executor/bindings.cpp @@ -22,16 +22,12 @@ #include "tensorrt_llm/executor/executor.h" #include "tensorrt_llm/executor/types.h" -#include -#include -#include -#include -#include -#include +#include +#include #include -namespace py = pybind11; +namespace py = nanobind; namespace tle = tensorrt_llm::executor; using SizeType32 = tle::SizeType32; @@ -39,14 +35,14 @@ namespace tensorrt_llm::pybind::executor { template -void instantiateEventDiff(pybind11::module& m, std::string const& name) +void instantiateEventDiff(py::module& m, std::string const& name) { py::class_>(m, ("KVCacheEventDiff" + name).c_str()) .def_readonly("old_value", &tle::KVCacheEventDiff::oldValue) .def_readonly("new_value", &tle::KVCacheEventDiff::newValue); } -void initBindings(pybind11::module_& m) +void initBindings(py::module_& m) { m.attr("__version__") = tle::version(); py::enum_(m, "ModelType") diff --git a/cpp/tensorrt_llm/pybind/executor/bindings.h b/cpp/tensorrt_llm/pybind/executor/bindings.h index ea9946d46d0..3ad76c17838 100644 --- a/cpp/tensorrt_llm/pybind/executor/bindings.h +++ b/cpp/tensorrt_llm/pybind/executor/bindings.h @@ -18,12 +18,12 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::pybind::executor { // Register bindings for executor API. -void initBindings(pybind11::module_& m); +void initBindings(nanobind::module_& m); } // namespace tensorrt_llm::pybind::executor diff --git a/cpp/tensorrt_llm/pybind/userbuffers/bindings.cpp b/cpp/tensorrt_llm/pybind/userbuffers/bindings.cpp index c8c32e5589b..50ce8a01317 100644 --- a/cpp/tensorrt_llm/pybind/userbuffers/bindings.cpp +++ b/cpp/tensorrt_llm/pybind/userbuffers/bindings.cpp @@ -19,13 +19,13 @@ #include "tensorrt_llm/kernels/userbuffers/ub_interface.h" #include "tensorrt_llm/kernels/userbuffers/userbuffersManager.h" -namespace py = pybind11; +namespace py = nanobind; namespace tub = tensorrt_llm::runtime::ub; namespace tensorrt_llm::kernels::userbuffers { -void UserBufferBindings::initBindings(pybind11::module_& m) +void UserBufferBindings::initBindings(py::module_& m) { py::class_(m, "UBBuffer") .def_readonly("size", &tub::UBBuffer::size) diff --git a/cpp/tensorrt_llm/pybind/userbuffers/bindings.h b/cpp/tensorrt_llm/pybind/userbuffers/bindings.h index 3a8fba2cc6f..e8913a5d846 100644 --- a/cpp/tensorrt_llm/pybind/userbuffers/bindings.h +++ b/cpp/tensorrt_llm/pybind/userbuffers/bindings.h @@ -18,13 +18,13 @@ #pragma once #include "tensorrt_llm/pybind/common/customCasters.h" -#include +#include namespace tensorrt_llm::kernels::userbuffers { class UserBufferBindings { public: - static void initBindings(pybind11::module_& m); + static void initBindings(nanobind::module_& m); }; } // namespace tensorrt_llm::kernels::userbuffers