Skip to content

Commit adfb897

Browse files
Tabrizianmikeiovine
authored andcommitted
[https://nvbugs/5601682][fix] Fix cacheTransceiver hang (NVIDIA#9311)
Signed-off-by: Iman Tabrizian <10105175+tabrizian@users.noreply.github.com> Signed-off-by: Mike Iovine <6158008+mikeiovine@users.noreply.github.com>
1 parent 078d3a5 commit adfb897

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

cpp/tensorrt_llm/nanobind/batch_manager/cacheTransceiver.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,11 @@ void tb::CacheTransceiverBindings::initBindings(nb::module_& m)
8888
.def("respond_and_send_async", &BaseCacheTransceiver::respondAndSendAsync)
8989
.def("request_and_receive_sync", &BaseCacheTransceiver::requestAndReceiveSync)
9090
.def("request_and_receive_async", &BaseCacheTransceiver::requestAndReceiveAsync)
91-
.def("check_context_transfer_status", &BaseCacheTransceiver::checkContextTransferStatus)
92-
.def("check_gen_transfer_status", &BaseCacheTransceiver::checkGenTransferStatus)
93-
.def("check_gen_transfer_complete", &BaseCacheTransceiver::checkGenTransferComplete)
91+
.def("check_context_transfer_status", &BaseCacheTransceiver::checkContextTransferStatus,
92+
nb::call_guard<nb::gil_scoped_release>())
93+
.def("check_gen_transfer_status", &BaseCacheTransceiver::checkGenTransferStatus,
94+
nb::call_guard<nb::gil_scoped_release>())
95+
.def("check_gen_transfer_complete", &BaseCacheTransceiver::checkGenTransferComplete);
9496
.def("cancel_request", &BaseCacheTransceiver::cancelRequest);
9597

9698
nb::enum_<executor::kv_cache::CacheState::AttentionType>(m, "AttentionType")

cpp/tensorrt_llm/pybind/batch_manager/cacheTransceiver.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,11 @@ void tb::CacheTransceiverBindings::initBindings(py::module_& m)
8484
.def("respond_and_send_async", &BaseCacheTransceiver::respondAndSendAsync)
8585
.def("request_and_receive_sync", &BaseCacheTransceiver::requestAndReceiveSync)
8686
.def("request_and_receive_async", &BaseCacheTransceiver::requestAndReceiveAsync)
87-
.def("check_context_transfer_status", &BaseCacheTransceiver::checkContextTransferStatus)
88-
.def("check_gen_transfer_status", &BaseCacheTransceiver::checkGenTransferStatus)
89-
.def("check_gen_transfer_complete", &BaseCacheTransceiver::checkGenTransferComplete)
87+
.def("check_context_transfer_status", &BaseCacheTransceiver::checkContextTransferStatus,
88+
py::call_guard<py::gil_scoped_release>())
89+
.def("check_gen_transfer_status", &BaseCacheTransceiver::checkGenTransferStatus,
90+
py::call_guard<py::gil_scoped_release>())
91+
.def("check_gen_transfer_complete", &BaseCacheTransceiver::checkGenTransferComplete);
9092
.def("cancel_request", &BaseCacheTransceiver::cancelRequest);
9193

9294
py::enum_<executor::kv_cache::CacheState::AttentionType>(m, "AttentionType")

0 commit comments

Comments
 (0)