@@ -631,7 +631,7 @@ class WindowBlockManager
631631
632632 void replaceSharedBlock (GenerationRequest& sequence, SizeType32 blockIdx);
633633
634- [[nodiscard]] std::optional <KVCacheBlock::IdType> storeBlocksForReuse (
634+ [[nodiscard]] std::vector <KVCacheBlock::IdType> storeBlocksForReuse (
635635 GenerationRequest& sequence, OptionalRef<LlmRequest const > llmRequest, bool pinBlocks = false );
636636
637637 void storeNewBlock (GenerationRequest& sequence, OptionalRef<LlmRequest const > llmRequest);
@@ -836,8 +836,8 @@ class WindowBlockManager
836836 // ! \param blockKeys Key of each block.
837837 // ! \param blockIds Id of each block.
838838 // ! \param pinBlocks If true, increment ref count for blocks while storing (pin on store).
839- // ! \return Pair of (num blocks stored for reuse, id of the last block stored if any ).
840- [[nodiscard]] std::pair<SizeType32, std::optional <KVCacheBlock::IdType>> storeBlocks (
839+ // ! \return Pair of (num blocks stored for reuse, vector of pinned block IDs ).
840+ [[nodiscard]] std::pair<SizeType32, std::vector <KVCacheBlock::IdType>> storeBlocks (
841841 std::vector<BlockKey> const & blockKeys, std::vector<KVCacheBlock::IdType> const & blockIds,
842842 bool pinBlocks = false );
843843
@@ -869,8 +869,8 @@ class WindowBlockManager
869869
870870 [[nodiscard]] std::shared_ptr<KVCacheBlock> findBlocksInReuseTreeByBlockKey (BlockKey const & blockKey);
871871
872- // ! \brief Unpin blocks by starting from a block id and walking prev pointers.
873- void unpinBlocksById (KVCacheBlock::IdType blockId );
872+ // ! \brief Unpin blocks by block ids directly
873+ void unpinBlocksById (std::vector< KVCacheBlock::IdType> const & blockIds );
874874
875875 void initializeSequenceStorageValidity (LlmRequest::RequestIdType requestId)
876876 {
@@ -1086,7 +1086,7 @@ class BlockManager
10861086 std::optional<KVCacheBlock::IdType> releaseBlocks (
10871087 GenerationRequest& sequence, OptionalRef<LlmRequest const > llmRequest = std::nullopt , bool pinBlocks = false );
10881088
1089- [[nodiscard]] std::optional <KVCacheBlock::IdType> storeBlocksForReuse (
1089+ [[nodiscard]] std::vector <KVCacheBlock::IdType> storeBlocksForReuse (
10901090 GenerationRequest& sequence, OptionalRef<LlmRequest const > llmRequest = std::nullopt , bool pinBlocks = false );
10911091
10921092 void schedulingReleaseBlocks (LlmRequest::RequestIdType requestId);
@@ -1095,7 +1095,7 @@ class BlockManager
10951095 // / @param sequence The generation request whose blocks should be pinned.
10961096 void pinBlocks (GenerationRequest& sequence);
10971097
1098- void unpinBlocksById (KVCacheBlock::IdType blockId );
1098+ void unpinBlocksById (std::vector< KVCacheBlock::IdType> const & blockIds );
10991099
11001100 void releaseLastBlock (GenerationRequest& sequence, SizeType32 windowSize);
11011101
@@ -1116,7 +1116,7 @@ class BlockManager
11161116 void offloadBlock (BlockPtr const & block, SizeType32 windowSize,
11171117 executor::KvCacheTransferMode mode = executor::KvCacheTransferMode::DRAM, std::string const & directory = " " );
11181118
1119- [[nodiscard]] std::pair<SizeType32, std::optional <KVCacheBlock::IdType>> storeBlocks (
1119+ [[nodiscard]] std::pair<SizeType32, std::vector <KVCacheBlock::IdType>> storeBlocks (
11201120 std::vector<BlockKey> const & blockKeys, std::vector<KVCacheBlock::IdType> const & blockIds,
11211121 SizeType32 windowSize, bool pinBlocks = false )
11221122 {
@@ -1567,7 +1567,7 @@ class BaseKVCacheManager
15671567 virtual void storeNewBlock (LlmRequest const & llmRequest) = 0;
15681568
15691569 // / \brief Store blocks for reuse for a given request id
1570- [[nodiscard]] virtual std::optional <KVCacheBlock::IdType> storeBlocksForReuse (
1570+ [[nodiscard]] virtual std::vector <KVCacheBlock::IdType> storeBlocksForReuse (
15711571 LlmRequest::RequestIdType requestId, OptionalRef<LlmRequest const > llmRequest, bool pinBlocks = false )
15721572 = 0;
15731573
@@ -1661,7 +1661,7 @@ class BaseKVCacheManager
16611661 BlockKey const & blockKey, SizeType32 windowSize)
16621662 = 0;
16631663
1664- virtual void unpinBlocksById (KVCacheBlock::IdType blockId ) = 0;
1664+ virtual void unpinBlocksById (std::vector< KVCacheBlock::IdType> const & blockIds ) = 0;
16651665};
16661666
16671667class KVCacheManager : public BaseKVCacheManager
@@ -1922,7 +1922,7 @@ class KVCacheManager : public BaseKVCacheManager
19221922 // ! \brief Store newest blocks for reuse
19231923 void storeNewBlock (LlmRequest const & llmRequest) override ;
19241924
1925- [[nodiscard]] std::optional <KVCacheBlock::IdType> storeBlocksForReuse (
1925+ [[nodiscard]] std::vector <KVCacheBlock::IdType> storeBlocksForReuse (
19261926 LlmRequest::RequestIdType requestId, OptionalRef<LlmRequest const > llmRequest, bool pinBlocks = false ) override ;
19271927
19281928 [[nodiscard]] static SizeType32 getSinkBubbleLength (SizeType32 sinkTokenLen, SizeType32 tokensPerBlock);
@@ -1943,7 +1943,7 @@ class KVCacheManager : public BaseKVCacheManager
19431943
19441944 void pinBlocks (LlmRequest::RequestIdType requestId) override ;
19451945
1946- void unpinBlocksById (KVCacheBlock::IdType blockId ) override ;
1946+ void unpinBlocksById (std::vector< KVCacheBlock::IdType> const & blockIds ) override ;
19471947
19481948 std::optional<KVCacheBlock::IdType> getLastBlockId (LlmRequest::RequestIdType requestId) const override ;
19491949
0 commit comments