Skip to content

Commit 2d8a6db

Browse files
committed
[https://nvbugs/5689235][fix] Fix cancellation+chunked prefill+disagg
Signed-off-by: Iman Tabrizian <[email protected]>
1 parent 4a1b742 commit 2d8a6db

File tree

12 files changed

+134
-64
lines changed

12 files changed

+134
-64
lines changed

cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ class WindowBlockManager
648648

649649
void replaceSharedBlock(GenerationRequest& sequence, SizeType32 blockIdx);
650650

651-
[[nodiscard]] std::optional<KVCacheBlock::IdType> storeBlocksForReuse(
651+
[[nodiscard]] std::vector<KVCacheBlock::IdType> storeBlocksForReuse(
652652
GenerationRequest& sequence, OptionalRef<LlmRequest const> llmRequest, bool pinBlocks = false);
653653

654654
void storeNewBlock(GenerationRequest& sequence, OptionalRef<LlmRequest const> llmRequest);
@@ -853,8 +853,8 @@ class WindowBlockManager
853853
//! \param blockKeys Key of each block.
854854
//! \param blockIds Id of each block.
855855
//! \param pinBlocks If true, increment ref count for blocks while storing (pin on store).
856-
//! \return Pair of (num blocks stored for reuse, id of the last block stored if any).
857-
[[nodiscard]] std::pair<SizeType32, std::optional<KVCacheBlock::IdType>> storeBlocks(
856+
//! \return Pair of (num blocks stored for reuse, vector of pinned block IDs).
857+
[[nodiscard]] std::pair<SizeType32, std::vector<KVCacheBlock::IdType>> storeBlocks(
858858
std::vector<BlockKey> const& blockKeys, std::vector<KVCacheBlock::IdType> const& blockIds,
859859
bool pinBlocks = false);
860860

@@ -886,8 +886,8 @@ class WindowBlockManager
886886

887887
[[nodiscard]] std::shared_ptr<KVCacheBlock> findBlocksInReuseTreeByBlockKey(BlockKey const& blockKey);
888888

889-
//! \brief Unpin blocks by starting from a block id and walking prev pointers.
890-
void unpinBlocksById(KVCacheBlock::IdType blockId);
889+
//! \brief Unpin blocks by block ids directly
890+
void unpinBlocksById(std::vector<KVCacheBlock::IdType> const& blockIds);
891891

892892
void initializeSequenceStorageValidity(LlmRequest::RequestIdType requestId)
893893
{
@@ -1103,7 +1103,7 @@ class BlockManager
11031103
std::optional<KVCacheBlock::IdType> releaseBlocks(
11041104
GenerationRequest& sequence, OptionalRef<LlmRequest const> llmRequest = std::nullopt, bool pinBlocks = false);
11051105

1106-
[[nodiscard]] std::optional<KVCacheBlock::IdType> storeBlocksForReuse(
1106+
[[nodiscard]] std::vector<KVCacheBlock::IdType> storeBlocksForReuse(
11071107
GenerationRequest& sequence, OptionalRef<LlmRequest const> llmRequest = std::nullopt, bool pinBlocks = false);
11081108

11091109
void schedulingReleaseBlocks(LlmRequest::RequestIdType requestId);
@@ -1112,7 +1112,7 @@ class BlockManager
11121112
/// @param sequence The generation request whose blocks should be pinned.
11131113
void pinBlocks(GenerationRequest& sequence);
11141114

1115-
void unpinBlocksById(KVCacheBlock::IdType blockId);
1115+
void unpinBlocksById(std::vector<KVCacheBlock::IdType> const& blockIds);
11161116

11171117
void releaseLastBlock(GenerationRequest& sequence, SizeType32 windowSize);
11181118

@@ -1133,7 +1133,7 @@ class BlockManager
11331133
void offloadBlock(BlockPtr const& block, SizeType32 windowSize,
11341134
executor::KvCacheTransferMode mode = executor::KvCacheTransferMode::DRAM, std::string const& directory = "");
11351135

1136-
[[nodiscard]] std::pair<SizeType32, std::optional<KVCacheBlock::IdType>> storeBlocks(
1136+
[[nodiscard]] std::pair<SizeType32, std::vector<KVCacheBlock::IdType>> storeBlocks(
11371137
std::vector<BlockKey> const& blockKeys, std::vector<KVCacheBlock::IdType> const& blockIds,
11381138
SizeType32 windowSize, bool pinBlocks = false)
11391139
{
@@ -1584,7 +1584,7 @@ class BaseKVCacheManager
15841584
virtual void storeNewBlock(LlmRequest const& llmRequest) = 0;
15851585

15861586
/// \brief Store blocks for reuse for a given request id
1587-
[[nodiscard]] virtual std::optional<KVCacheBlock::IdType> storeBlocksForReuse(
1587+
[[nodiscard]] virtual std::vector<KVCacheBlock::IdType> storeBlocksForReuse(
15881588
LlmRequest::RequestIdType requestId, OptionalRef<LlmRequest const> llmRequest, bool pinBlocks = false)
15891589
= 0;
15901590

@@ -1678,7 +1678,7 @@ class BaseKVCacheManager
16781678
BlockKey const& blockKey, SizeType32 windowSize)
16791679
= 0;
16801680

1681-
virtual void unpinBlocksById(KVCacheBlock::IdType blockId) = 0;
1681+
virtual void unpinBlocksById(std::vector<KVCacheBlock::IdType> const& blockIds) = 0;
16821682
};
16831683

16841684
class KVCacheManager : public BaseKVCacheManager
@@ -1939,7 +1939,7 @@ class KVCacheManager : public BaseKVCacheManager
19391939
//! \brief Store newest blocks for reuse
19401940
void storeNewBlock(LlmRequest const& llmRequest) override;
19411941

1942-
[[nodiscard]] std::optional<KVCacheBlock::IdType> storeBlocksForReuse(
1942+
[[nodiscard]] std::vector<KVCacheBlock::IdType> storeBlocksForReuse(
19431943
LlmRequest::RequestIdType requestId, OptionalRef<LlmRequest const> llmRequest, bool pinBlocks = false) override;
19441944

19451945
[[nodiscard]] static SizeType32 getSinkBubbleLength(SizeType32 sinkTokenLen, SizeType32 tokensPerBlock);
@@ -1960,7 +1960,7 @@ class KVCacheManager : public BaseKVCacheManager
19601960

19611961
void pinBlocks(LlmRequest::RequestIdType requestId) override;
19621962

1963-
void unpinBlocksById(KVCacheBlock::IdType blockId) override;
1963+
void unpinBlocksById(std::vector<KVCacheBlock::IdType> const& blockIds) override;
19641964

19651965
std::optional<KVCacheBlock::IdType> getLastBlockId(LlmRequest::RequestIdType requestId) const override;
19661966

cpp/include/tensorrt_llm/batch_manager/llmRequest.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1667,6 +1667,12 @@ class GenericLlmRequest
16671667
[](auto reason) { return reason == executor::FinishReason::kLENGTH; });
16681668
}
16691669

1670+
[[nodiscard]] bool isFinishedDueToCancellation() const noexcept
1671+
{
1672+
return std::all_of(mFinishReasons.begin(), mFinishReasons.end(),
1673+
[](auto reason) { return reason == executor::FinishReason::kCANCELLED; });
1674+
}
1675+
16701676
[[nodiscard]] bool isTimedOut() const
16711677
{
16721678
if (!mAllottedTimeMs.has_value())

cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1556,7 +1556,7 @@ void WindowBlockManager::allocateBlock(GenerationRequest& sequence, bool shareAm
15561556
}
15571557
}
15581558

1559-
std::pair<SizeType32, std::optional<KVCacheBlock::IdType>> WindowBlockManager::storeBlocks(
1559+
std::pair<SizeType32, std::vector<KVCacheBlock::IdType>> WindowBlockManager::storeBlocks(
15601560
std::vector<BlockKey> const& blockKeys, std::vector<KVCacheBlock::IdType> const& blockIds, bool pinBlocks)
15611561
{
15621562
SizeType32 numBlocksStoredForReuse = 0;
@@ -1569,7 +1569,7 @@ std::pair<SizeType32, std::optional<KVCacheBlock::IdType>> WindowBlockManager::s
15691569

15701570
auto numBlocks = blockKeys.size();
15711571
std::vector<BlockPtr> storedBlocks;
1572-
std::optional<KVCacheBlock::IdType> lastStoredId = std::nullopt;
1572+
std::vector<KVCacheBlock::IdType> pinnedBlockIds;
15731573
for (std::size_t blockCnt = 0; blockCnt < numBlocks; ++blockCnt)
15741574
{
15751575
auto const bid = blockIds[blockCnt];
@@ -1620,14 +1620,14 @@ std::pair<SizeType32, std::optional<KVCacheBlock::IdType>> WindowBlockManager::s
16201620
if (pinBlocks)
16211621
{
16221622
searchRoot->incRefCount();
1623+
pinnedBlockIds.push_back(searchRoot->getBlockId());
16231624
}
1624-
lastStoredId = searchRoot->getBlockId();
16251625
}
16261626
if (mEventManager)
16271627
{
16281628
mEventManager->enqueueStoredEvent(storedBlocks, mWindowSize);
16291629
}
1630-
return {numBlocksStoredForReuse, lastStoredId};
1630+
return {numBlocksStoredForReuse, pinnedBlockIds};
16311631
}
16321632

16331633
void BlockManager::replaceSharedBlock(GenerationRequest& sequence, SizeType32 windowSize, SizeType32 blockIdx)
@@ -1715,15 +1715,15 @@ std::deque<tle::KVCacheEvent> BlockManager::getLatestEvents(std::optional<std::c
17151715
return mEventManager ? mEventManager->getEvents(timeout) : std::deque<tle::KVCacheEvent>{};
17161716
}
17171717

1718-
std::optional<KVCacheBlock::IdType> BlockManager::storeBlocksForReuse(
1718+
std::vector<KVCacheBlock::IdType> BlockManager::storeBlocksForReuse(
17191719
GenerationRequest& sequence, OptionalRef<LlmRequest const> llmRequest, bool pinBlocks)
17201720
{
1721-
std::optional<KVCacheBlock::IdType> lastStoredId = std::nullopt;
1721+
std::vector<KVCacheBlock::IdType> pinnedBlockIds;
17221722
for (auto& [_, manager] : mWindowBlockManagers)
17231723
{
1724-
lastStoredId = manager.storeBlocksForReuse(sequence, llmRequest, pinBlocks);
1724+
pinnedBlockIds = manager.storeBlocksForReuse(sequence, llmRequest, pinBlocks);
17251725
}
1726-
return lastStoredId;
1726+
return pinnedBlockIds;
17271727
}
17281728

17291729
std::optional<KVCacheBlock::IdType> BlockManager::releaseBlocks(
@@ -1767,15 +1767,15 @@ void BlockManager::pinBlocks(GenerationRequest& sequence)
17671767
}
17681768
}
17691769

1770-
void BlockManager::unpinBlocksById(KVCacheBlock::IdType blockId)
1770+
void BlockManager::unpinBlocksById(std::vector<KVCacheBlock::IdType> const& blockIds)
17711771
{
17721772
// Use the first window size
17731773
if (mWindowBlockManagers.empty())
17741774
{
17751775
return;
17761776
}
17771777
auto& firstManager = mWindowBlockManagers.begin()->second;
1778-
firstManager.unpinBlocksById(blockId);
1778+
firstManager.unpinBlocksById(blockIds);
17791779
}
17801780

17811781
void WindowBlockManager::pinBlocks(GenerationRequest& sequence)
@@ -1788,21 +1788,28 @@ void WindowBlockManager::pinBlocks(GenerationRequest& sequence)
17881788
}
17891789
}
17901790

1791-
void WindowBlockManager::unpinBlocksById(KVCacheBlock::IdType blockId)
1791+
void WindowBlockManager::unpinBlocksById(std::vector<KVCacheBlock::IdType> const& blockIds)
17921792
{
1793-
if (blockId < 0 || static_cast<size_t>(blockId) >= mAllBlocksById.size())
1793+
if (blockIds.empty())
17941794
{
17951795
return;
17961796
}
1797-
auto block = mAllBlocksById[blockId];
1798-
while (block && block->getBlockId() != KVCacheBlock::kCachedBlocksRootId)
1797+
1798+
for (auto const& blockId : blockIds)
17991799
{
1800-
block->decRefCount();
1801-
if (!block->hasRefs())
1800+
if (blockId < 0 || static_cast<size_t>(blockId) >= mAllBlocksById.size())
18021801
{
1803-
mEvictionPolicy->releaseBlock(block);
1802+
continue;
1803+
}
1804+
auto block = mAllBlocksById[blockId];
1805+
if (block && block->getBlockId() != KVCacheBlock::kCachedBlocksRootId)
1806+
{
1807+
block->decRefCount();
1808+
if (!block->hasRefs())
1809+
{
1810+
mEvictionPolicy->releaseBlock(block);
1811+
}
18041812
}
1805-
block = std::move(block->getPrevBlock());
18061813
}
18071814
}
18081815

@@ -1870,7 +1877,7 @@ void WindowBlockManager::storeNewBlock(GenerationRequest& sequence, OptionalRef<
18701877
(void) storeBlocks(std::move(blockKeys), cacheBlockIds[beamIdx]);
18711878
}
18721879

1873-
std::optional<KVCacheBlock::IdType> WindowBlockManager::storeBlocksForReuse(
1880+
std::vector<KVCacheBlock::IdType> WindowBlockManager::storeBlocksForReuse(
18741881
GenerationRequest& sequence, OptionalRef<LlmRequest const> llmRequest, bool pinBlocks)
18751882
{
18761883
auto constexpr beamIdx = 0;
@@ -1883,7 +1890,10 @@ std::optional<KVCacheBlock::IdType> WindowBlockManager::storeBlocksForReuse(
18831890
auto const usableSize = static_cast<runtime::SizeType32>(uniqueTokens.size()) - 1;
18841891
auto blockedUniqueTokens = chopVectorIntoBlocks<UniqueToken>(uniqueTokens, usableSize, mTokensPerBlock, true);
18851892
auto blockKeys = buildBlockKeys(blockedUniqueTokens, *llmRequest);
1886-
return storeBlocks(std::move(blockKeys), cacheBlockIds[beamIdx], pinBlocks).second;
1893+
1894+
auto [numStored, pinnedBlockIds] = storeBlocks(std::move(blockKeys), cacheBlockIds[beamIdx], pinBlocks);
1895+
1896+
return pinnedBlockIds;
18871897
}
18881898

18891899
std::optional<KVCacheBlock::IdType> WindowBlockManager::releaseBlocks(
@@ -1922,7 +1932,7 @@ std::optional<KVCacheBlock::IdType> WindowBlockManager::releaseBlocks(
19221932
std::transform(allocatedBlocks.begin(), allocatedBlocks.end(), cacheBlockIds.begin(),
19231933
[](BlockPtr const& block) { return block->getBlockId(); });
19241934

1925-
auto [numBlocksStoredForReuse, lastStoredId] = storeBlocks(std::move(blockKeys), cacheBlockIds);
1935+
auto [numBlocksStoredForReuse, pinnedBlockIds] = storeBlocks(std::move(blockKeys), cacheBlockIds);
19261936
TLLM_LOG_DEBUG("%s::releaseBlocks Request %lu, %d blocks stored for reuse", mLogPrefix.c_str(),
19271937
sequence.getRequestId(), numBlocksStoredForReuse);
19281938
}
@@ -2499,15 +2509,14 @@ std::optional<KVCacheBlock::IdType> KVCacheManager::removeSequence(
24992509
return lastStoredId;
25002510
}
25012511

2502-
std::optional<KVCacheBlock::IdType> KVCacheManager::storeBlocksForReuse(
2512+
std::vector<KVCacheBlock::IdType> KVCacheManager::storeBlocksForReuse(
25032513
RequestIdType requestId, OptionalRef<LlmRequest const> llmRequest, bool pinBlocks)
25042514
{
25052515
TLLM_LOG_TRACE("[%s]::%s start", isCrossKv() ? "CROSS" : "SELF", __PRETTY_FUNCTION__);
25062516
auto& sequence = getSequence(requestId);
2507-
std::optional<KVCacheBlock::IdType> lastStoredId
2508-
= mBlockManager.storeBlocksForReuse(sequence, llmRequest, pinBlocks);
2517+
auto pinnedBlockIds = mBlockManager.storeBlocksForReuse(sequence, llmRequest, pinBlocks);
25092518
TLLM_LOG_TRACE("[%s]::%s stop", isCrossKv() ? "CROSS" : "SELF", __PRETTY_FUNCTION__);
2510-
return lastStoredId;
2519+
return pinnedBlockIds;
25112520
}
25122521

25132522
void KVCacheManager::schedulingRemoveSequence(RequestIdType requestId)
@@ -2522,9 +2531,9 @@ void KVCacheManager::pinBlocks(RequestIdType requestId)
25222531
mBlockManager.pinBlocks(sequence);
25232532
}
25242533

2525-
void KVCacheManager::unpinBlocksById(KVCacheBlock::IdType blockId)
2534+
void KVCacheManager::unpinBlocksById(std::vector<KVCacheBlock::IdType> const& blockIds)
25262535
{
2527-
mBlockManager.unpinBlocksById(blockId);
2536+
mBlockManager.unpinBlocksById(blockIds);
25282537
}
25292538

25302539
SizeType32 KVCacheManager::copyBlockOffsets(ITensor& output, SizeType32 outputSlotOffset, RequestIdType requestId) const

cpp/tensorrt_llm/executor/executorImpl.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2179,11 +2179,11 @@ void Executor::Impl::terminateContextFinishedRequests(InTransList& inTransmissio
21792179
auto req = item.request;
21802180
if (req->isDisaggContextCompleteState())
21812181
{
2182-
// If lastBlockId was tracked, unpin it. Otherwise, just terminate.
2182+
// If pinnedBlockIds were tracked, unpin them. Otherwise, just terminate.
21832183
auto kvMgr = mModel->getKVCacheManager();
2184-
if (kvMgr && item.lastBlockId.has_value())
2184+
if (kvMgr && !item.pinnedBlockIds.empty())
21852185
{
2186-
kvMgr->unpinBlocksById(item.lastBlockId.value());
2186+
kvMgr->unpinBlocksById(item.pinnedBlockIds);
21872187
}
21882188
else
21892189
{
@@ -2234,14 +2234,14 @@ Executor::Impl::RequestList Executor::Impl::populateNewResponses(
22342234
// move the in transmission requests to another tracker
22352235
if (llmReq->isDisaggContextTransmissionState())
22362236
{
2237-
std::optional<SizeType32> lastBlockId{};
2237+
std::vector<SizeType32> pinnedBlockIds{};
22382238
auto kvMgr = mModel->getKVCacheManager();
22392239
if (kvMgr && kvMgr->isEnableBlockReuse() && !kvMgr->getBlockManager().isVariableWindow())
22402240
{
2241-
lastBlockId = kvMgr->storeBlocksForReuse(llmReq->mRequestId, llmReq, /*pinBlocks=*/true);
2241+
pinnedBlockIds = kvMgr->storeBlocksForReuse(llmReq->mRequestId, llmReq, /*pinBlocks=*/true);
22422242
mModel->terminateRequest(llmReq);
22432243
}
2244-
inTransmissionRequests.push_back(InTransmissionItem{*it, lastBlockId});
2244+
inTransmissionRequests.push_back(InTransmissionItem{*it, pinnedBlockIds});
22452245
}
22462246
finishedRequests.push_back(*it);
22472247
it = activeRequests.erase(it);

cpp/tensorrt_llm/executor/executorImpl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,12 @@ class Executor::Impl
8080
using RequestList = std::list<LlmRequestPtr>;
8181

8282
// When block reuse is enabled for context worker for disaggregated serving,
83-
// we need to store the last block id so that we can unpin the block when
83+
// we need to store the pinned block ids so that we can unpin them when
8484
// the request is finished.
8585
struct InTransmissionItem
8686
{
8787
LlmRequestPtr request;
88-
std::optional<SizeType32> lastBlockId;
88+
std::vector<SizeType32> pinnedBlockIds;
8989
};
9090

9191
using InTransList = std::list<InTransmissionItem>;

cpp/tensorrt_llm/nanobind/batch_manager/bindings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ void initBindings(nb::module_& m)
161161
.def("set_finished_reason", &GenLlmReq::setFinishedReason, nb::arg("finish_reason"), nb::arg("beam"))
162162
.def_prop_ro("is_finished", &GenLlmReq::isFinished)
163163
.def_prop_ro("is_finished_due_to_length", &GenLlmReq::isFinishedDueToLength)
164+
.def_prop_ro("is_finished_due_to_cancellation", &GenLlmReq::isFinishedDueToCancellation)
164165
.def_prop_rw(
165166
"context_current_position", &GenLlmReq::getContextCurrentPosition, &GenLlmReq::setContextCurrentPosition)
166167
.def_prop_ro("prepopulated_prompt_len", &GenLlmReq::getPrepopulatedPromptLen)

cpp/tensorrt_llm/nanobind/batch_manager/kvCacheManager.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class PyKvCacheManager : public tbk::BaseKVCacheManager
123123
NB_OVERRIDE_PURE(removeSequence, requestId, llmRequest, pinOnRelease);
124124
}
125125

126-
std::optional<tbk::KVCacheBlock::IdType> storeBlocksForReuse(tb::LlmRequest::RequestIdType requestId,
126+
std::vector<tbk::KVCacheBlock::IdType> storeBlocksForReuse(tb::LlmRequest::RequestIdType requestId,
127127
tensorrt_llm::common::OptionalRef<tb::LlmRequest const> llmRequest, bool pinBlocks) override
128128
{
129129
NB_OVERRIDE_PURE(storeBlocksForReuse, requestId, llmRequest, pinBlocks);
@@ -363,7 +363,22 @@ void tb::kv_cache_manager::KVCacheManagerBindings::initBindings(nb::module_& m)
363363
nb::call_guard<nb::gil_scoped_release>())
364364
.def("add_token", &BaseKVCacheManager::addToken, nb::call_guard<nb::gil_scoped_release>())
365365
.def("add_sequence", &BaseKVCacheManager::addSequence, nb::call_guard<nb::gil_scoped_release>())
366-
.def("remove_sequence", &BaseKVCacheManager::removeSequence, nb::call_guard<nb::gil_scoped_release>())
366+
.def(
367+
"remove_sequence",
368+
[](tbk::BaseKVCacheManager& self, tb::LlmRequest::RequestIdType requestId, tb::LlmRequest const* llmRequest,
369+
bool pinOnRelease)
370+
{
371+
if (llmRequest != nullptr)
372+
{
373+
return self.removeSequence(requestId, *llmRequest, pinOnRelease);
374+
}
375+
else
376+
{
377+
return self.removeSequence(requestId, std::nullopt, pinOnRelease);
378+
}
379+
},
380+
nb::arg("request_id"), nb::arg("llm_request") = nullptr, nb::arg("pin_on_release") = false,
381+
nb::call_guard<nb::gil_scoped_release>())
367382
.def("pin_blocks", &BaseKVCacheManager::pinBlocks, nb::call_guard<nb::gil_scoped_release>())
368383
.def("scheduling_remove_sequence", &BaseKVCacheManager::schedulingRemoveSequence,
369384
nb::call_guard<nb::gil_scoped_release>())

cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ void initBindings(pybind11::module_& m)
165165
.def("set_finished_reason", &GenLlmReq::setFinishedReason, py::arg("finish_reason"), py::arg("beam"))
166166
.def_property_readonly("is_finished", &GenLlmReq::isFinished)
167167
.def_property_readonly("is_finished_due_to_length", &GenLlmReq::isFinishedDueToLength)
168+
.def_property_readonly("is_finished_due_to_cancellation", &GenLlmReq::isFinishedDueToCancellation)
168169
.def_property(
169170
"context_current_position", &GenLlmReq::getContextCurrentPosition, &GenLlmReq::setContextCurrentPosition)
170171
.def_property_readonly("prepopulated_prompt_len", &GenLlmReq::getPrepopulatedPromptLen)

0 commit comments

Comments
 (0)