Skip to content

Commit 83dbc6c

Browse files
authored
[TRTLLM-5532][feat] store the block of context request into kv cache (NVIDIA#6683)
Signed-off-by: bhsueh <[email protected]>
1 parent 9a8195e commit 83dbc6c

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2043,10 +2043,13 @@ void KVCacheManager::addSequence(
20432043
void KVCacheManager::storeContextBlocks(LlmRequest const& llmRequest)
20442044
{
20452045
auto const requestId = llmRequest.mRequestId;
2046-
auto& sequence = getSequence(requestId);
2047-
if (mEnableBlockReuse && !sequence.isCyclic() && !llmRequest.isDummyRequest())
2046+
if (mSequences.find(requestId) != mSequences.end())
20482047
{
2049-
mBlockManager.storeContextBlocks(sequence, llmRequest);
2048+
auto& sequence = getSequence(requestId);
2049+
if (mEnableBlockReuse && !sequence.isCyclic() && !llmRequest.isDummyRequest())
2050+
{
2051+
mBlockManager.storeContextBlocks(sequence, llmRequest);
2052+
}
20502053
}
20512054
}
20522055

tensorrt_llm/_torch/pyexecutor/resource_manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,10 @@ def update_resources(self, scheduled_batch: ScheduledRequests):
492492
if request.py_rewind_len > 0:
493493
self.rewind_kv_cache(request, request.py_rewind_len)
494494

495+
# For context requests, we store the blocks for reuse.
496+
for request in scheduled_batch.context_requests:
497+
self.impl.store_context_blocks(request)
498+
495499
def free_resources(self, request: LlmRequest):
496500
self.impl.remove_sequence(request.py_request_id, request)
497501

0 commit comments

Comments
 (0)