Skip to content

Commit b06d69b

Browse files
authored
fix: Response Cache Memory Leak (#449)
1 parent 70b908c commit b06d69b

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

src/dynamic_batch_scheduler.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -673,17 +673,14 @@ void
673673
DynamicBatchScheduler::DelegateResponse(
674674
std::unique_ptr<InferenceRequest>& request)
675675
{
676-
std::lock_guard<std::mutex> lock(completion_queue_mtx_);
677-
completion_queue_.emplace_back();
678-
auto queue_slot = &completion_queue_.back();
679676
// Cache plumbing
680677
const std::string& key = request->CacheKey();
681678
const bool is_key_set = request->CacheKeyIsSet();
682679
const uint64_t lookup_end_ns = request->CacheLookupEndNs();
683680
const uint64_t lookup_start_ns = request->CacheLookupStartNs();
684681

685682
request->SetResponseDelegator(
686-
[this, queue_slot, key, is_key_set, lookup_end_ns, lookup_start_ns](
683+
[this, key, is_key_set, lookup_end_ns, lookup_start_ns](
687684
std::unique_ptr<InferenceResponse>&& response, const uint32_t flags) {
688685
if (response_cache_enabled_) {
689686
// Logical error, the key should be set if caching is enabled
@@ -734,6 +731,8 @@ DynamicBatchScheduler::DelegateResponse(
734731
if (preserve_ordering_) {
735732
{
736733
std::lock_guard<std::mutex> lock(completion_queue_mtx_);
734+
completion_queue_.emplace_back();
735+
auto queue_slot = &completion_queue_.back();
737736
queue_slot->emplace_back(std::move(response), flags);
738737
}
739738
FinalizeResponses();

0 commit comments

Comments
 (0)