diff --git a/cpp/include/tensorrt_llm/batch_manager/evictionPolicy.h b/cpp/include/tensorrt_llm/batch_manager/evictionPolicy.h
index affa83279b7..abab168bb14 100644
--- a/cpp/include/tensorrt_llm/batch_manager/evictionPolicy.h
+++ b/cpp/include/tensorrt_llm/batch_manager/evictionPolicy.h
@@ -91,11 +91,20 @@ class LRUEvictionPolicy : public BaseEvictionPolicy
 
     bool verifyQueueIntegrity() override;
 
+private:
+    //! \brief Add block to free block queue. Records all info needed to remove block from queue
+    void addToFreeBlockQueue(BlockPtr block, bool toFront);
+
+    //! \brief Remove block from free block queue, using info stored when block was added. It is always safe to call
+    //! this method \param block The block to be removed from free blocks queue. NOOP if block is not currently in queue
+    //! \return True if block was removed from free queue.
+    [[nodiscard]] bool removeFromFreeBlockQueue(BlockPtr block);
+
 private:
     // Queues of available leaf blocks, split by cache level and priority level
     std::vector<std::vector<FreeBlocksQueue>> mFreeQueues;
-    // Iterators to block entries in mFreeQueues
-    std::vector<std::optional<FreeBlocksQueue::iterator>> mFreeBlockIterators;
+    // Iterators to block entries in mFreeQueues. Holds ALL arguments needed to remove block from free queue
+    std::vector<std::optional<std::tuple<SizeType32, SizeType32, FreeBlocksQueue::iterator>>> mFreeBlockIterators;
     // Amount of free blocks at each cache level
     std::vector<SizeType32> mNumFreeBlocksPerLevel;
     // Secondary offload threshold. Blocks below this priority won't be evicted.
diff --git a/cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h b/cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h
index 64ff4c0d3fc..5b8f6f2a9e7 100644
--- a/cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h
+++ b/cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h
@@ -337,9 +337,6 @@ class KVCacheBlock : public std::enable_shared_from_this<KVCacheBlock>
     // Distinct from getPrevBlock() (which navigates the radix lookup tree)
     BlockPtr mPrevBlockInSeq;
 
-    // Iterator pointing to this block in mFreeBlocks.
-    std::optional<FreeBlocksQueue::iterator> mFreeBlockIterator;
-
     // Flag indicating if block is full
     bool mIsFull;
 
diff --git a/cpp/tensorrt_llm/batch_manager/evictionPolicy.cpp b/cpp/tensorrt_llm/batch_manager/evictionPolicy.cpp
index 97108823f4d..6b843b6c346 100644
--- a/cpp/tensorrt_llm/batch_manager/evictionPolicy.cpp
+++ b/cpp/tensorrt_llm/batch_manager/evictionPolicy.cpp
@@ -65,7 +65,8 @@ void LRUEvictionPolicy::initialize(std::vector<BlockPtr>& mAllBlocksById, std::v
         for (SizeType32 blockId = 0; blockId < sizes[cacheLevel]; blockId++)
         {
             // Initialize all blocks to be the default priority level
-            mFreeBlockIterators.emplace_back(freeQueue.insert(freeQueue.end(), mAllBlocksById[startIdx + blockId]));
+            mFreeBlockIterators.emplace_back(std::make_tuple(
+                cacheLevel, defaultPriorityIdx, freeQueue.insert(freeQueue.end(), mAllBlocksById[startIdx + blockId])));
         }
 
         startIdx += sizes[cacheLevel];
@@ -122,6 +123,37 @@ std::tuple<BlockPtr, bool> LRUEvictionPolicy::getFreeBlock(SizeType32 cacheLevel
     TLLM_THROW("No free block found. This shouldn't happen!");
 }
 
+void LRUEvictionPolicy::addToFreeBlockQueue(BlockPtr block, bool toFront)
+{
+    // Info needed to add (and later remove) block to queue.
+    SizeType32 const cacheLevel = getCacheLevel(block);
+    SizeType32 const id = block->getBlockId();
+    SizeType32 const priority = getPriorityIdx(block->getPriority());
+    // Add block to queue along with all info required to later remove it
+    auto& q = mFreeQueues[cacheLevel][priority];
+    auto insertItr = toFront ? q.begin() : q.end();
+    mFreeBlockIterators[id] = std::make_tuple(cacheLevel, priority, q.insert(insertItr, block));
+    mNumFreeBlocksPerLevel[cacheLevel]++;
+}
+
+bool LRUEvictionPolicy::removeFromFreeBlockQueue(BlockPtr block)
+{
+    SizeType32 const id = block->getBlockId();
+    if (mFreeBlockIterators[id].has_value())
+    {
+        // Remove block using stored values, not current values
+        auto [cacheLevel, priority, it] = mFreeBlockIterators[id].value();
+        mFreeQueues[cacheLevel][priority].erase(it);
+        mNumFreeBlocksPerLevel[cacheLevel] -= 1;
+        mFreeBlockIterators[id] = std::nullopt;
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
 void LRUEvictionPolicy::releaseBlock(BlockPtr block)
 {
     releaseBlock(block, false);
@@ -136,21 +168,8 @@ void LRUEvictionPolicy::releaseBlock(BlockPtr block, bool toFront)
         "Attempted to release the cached-blocks root into the eviction queue");
     // Placeholder blocks have no physical GPU memory and must never enter the eviction queue.
     TLLM_CHECK_WITH_INFO(!block->isPlaceholder(), "Attempted to release a placeholder block into the eviction queue");
-    SizeType32 const cacheLevel = getCacheLevel(block);
-    SizeType32 const id = block->getBlockId();
-
-    // If there are no children, this is a leaf block. Insert into a queue.
-    auto& q = mFreeQueues[cacheLevel][getPriorityIdx(block->getPriority())];
-    if (toFront)
-    {
-        mFreeBlockIterators[id] = q.insert(q.begin(), block);
-    }
-    else
-    {
-        mFreeBlockIterators[id] = q.insert(q.end(), block);
-    }
 
-    mNumFreeBlocksPerLevel[cacheLevel]++;
+    addToFreeBlockQueue(block, toFront);
 
     if (block->getDurationMs().has_value()
         && block->getPriority() != executor::KvCacheRetentionConfig::kDefaultRetentionPriority)
@@ -174,23 +193,16 @@ void LRUEvictionPolicy::claimBlock(BlockPtr block)
 void LRUEvictionPolicy::claimBlock(BlockPtr block, std::optional<executor::RetentionPriority> priority,
     std::optional<std::chrono::milliseconds> durationMs)
 {
-    SizeType32 const id = block->getBlockId();
-    SizeType32 const cacheLevel = getCacheLevel(block);
-
-    if (mFreeBlockIterators[id] != std::nullopt)
+    if (removeFromFreeBlockQueue(block))
     {
-        mFreeQueues[cacheLevel][getPriorityIdx(block->getPriority())].erase(*mFreeBlockIterators[id]);
-        mNumFreeBlocksPerLevel[cacheLevel] -= 1;
+        // Only need to remove block from expiring block heap if block was removed from free blocks queue
+        mExpiringBlockHeap.erase(block);
     }
 
-    mFreeBlockIterators[id] = std::nullopt;
-
     if (priority.has_value())
     {
         block->setPriority(*priority);
     }
-
-    mExpiringBlockHeap.erase(block);
     block->setDurationMs(durationMs);
 }
 
@@ -209,19 +221,15 @@ void LRUEvictionPolicy::refresh()
             break;
         }
 
-        auto const id = block->getBlockId();
-        auto const level = getCacheLevel(block);
-
         mExpiringBlockHeap.erase(mExpiringBlockHeap.begin());
 
-        if (mFreeBlockIterators[id] != std::nullopt)
+        // Add block to free blocks queue with default priority if it was removed from another priority free blocks
+        // queue
+        if (removeFromFreeBlockQueue(block))
         {
-            // This is already in another queue. Delete it, and bring it down to the default queue
-            mFreeQueues[level][getPriorityIdx(block->getPriority())].erase(*mFreeBlockIterators[id]);
-            auto& q = mFreeQueues[level][getPriorityIdx(kDefaultPriority)];
-            mFreeBlockIterators[id] = q.insert(q.end(), block);
+            block->setPriority(kDefaultPriority);
+            addToFreeBlockQueue(block, /*toFront*/ false);
         }
-        block->setPriority(kDefaultPriority);
     }
 }
 
diff --git a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp
index 107c703b0b7..df82ccc4ac3 100755
--- a/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp
+++ b/cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp
@@ -99,7 +99,6 @@ KVCacheBlock::KVCacheBlock(IdType blockId, tk::KVCacheIndex blockIdx)
     , mWindowSize{std::numeric_limits<int>::max()}
     // sentinel: unattached; valid sizes are >= 1 or kRecurrentStates (-1)
     , mIsPlaceholder{false}
-    , mFreeBlockIterator(std::nullopt)
     , mIsFull{false}
     , mPriority{executor::KvCacheRetentionConfig::kDefaultRetentionPriority}
     , mDurationMs{std::nullopt}
@@ -990,6 +989,13 @@ BlockPtr WindowBlockManager::getFreeBlock(GenerationRequest& sequence, executor:
     {
         // Offload block in primary memory before repurposing
         auto offloadBlock = std::get<0>(mEvictionPolicy->getFreeBlock(kSecondaryLevel));
+        // Claim both blocks BEFORE the swap so getCacheLevel() still reflects the
+        // actual free-queue each iterator belongs to.  After swapMemoryPoolBlockOffset()
+        // isPrimary() is inverted for both blocks, so calling claimBlock() post-swap
+        // would make it erase from the wrong std::list -- undefined behaviour.
+        // This ordering matches WindowBlockManager::offloadBlock().
+        mEvictionPolicy->claimBlock(block);        // block is PRIMARY  -> erases from primary queue
+        mEvictionPolicy->claimBlock(offloadBlock); // offloadBlock is SECONDARY -> erases from secondary queue
         mTransferManager->offload(block, offloadBlock, mPools, 0, mode, directory);
         // swap linear block offsets (i.e. make block the offload block)
         block->swapMemoryPoolBlockOffset(offloadBlock);
@@ -1000,18 +1006,19 @@ BlockPtr WindowBlockManager::getFreeBlock(GenerationRequest& sequence, executor:
                 tle::KVCacheUpdatedData(block->getHash()).cacheLevelUpdated(kPrimaryLevel, kSecondaryLevel),
                 mWindowSize);
         }
-        // Update the block as a secondary block (maintaining its priority)
-        mEvictionPolicy->claimBlock(block);
-        // Release the block into secondary block queue
+        // Release block (now SECONDARY after the swap) into the secondary queue
         mEvictionPolicy->releaseBlock(block);
         // We have the offloaded block as the block to use now.
         block = offloadBlock;
     }
+    else
+    {
+        // Claim the block in primary block queue
+        mEvictionPolicy->claimBlock(block, priority, durationMs);
+    }
 
     // Removes children of the block from the search tree
     freeChildren(block);
-    // Claim the block in primary block queue
-    mEvictionPolicy->claimBlock(block, priority, durationMs);
 
     // Deal with invalidating block save for reuse for the sequence
     if (mBlockToSequence.count(block->getBlockId()) > 0)
diff --git a/cpp/tests/unit_tests/batch_manager/evictionPolicyTest.cpp b/cpp/tests/unit_tests/batch_manager/evictionPolicyTest.cpp
index 96b6011eb56..bf5a6405299 100644
--- a/cpp/tests/unit_tests/batch_manager/evictionPolicyTest.cpp
+++ b/cpp/tests/unit_tests/batch_manager/evictionPolicyTest.cpp
@@ -223,6 +223,61 @@ TEST_F(LRUPolicyTest, TimedBlockTest)
     EXPECT_EQ(std::get<0>(policy->getFreeBlock(0)), block1);
 }
 
+// Regression test for PR #12297: claimBlock() used getCacheLevel() (which reads isPrimary())
+// to locate the right free queue. If swapMemoryPoolBlockOffset() ran first — flipping
+// isPrimary() on both blocks — claimBlock() would erase from the wrong std::list (UB).
+// The fix stores (cacheLevel, priorityIdx, iterator) at enqueue time so the removal path
+// is independent of the block's current isPrimary() value.
+TEST_F(LRUPolicyTest, ClaimAfterSwapDoesNotCorruptQueues)
+{
+    // getFreeBlock is a peek — it does not claim. Grab one block from each level.
+    auto [primaryBlock, primaryShouldOffload] = policy->getFreeBlock(0);
+    auto [secondaryBlock, secondaryShouldOffload] = policy->getFreeBlock(1);
+
+    ASSERT_NE(primaryBlock, nullptr);
+    ASSERT_NE(secondaryBlock, nullptr);
+    ASSERT_TRUE(primaryBlock->isPrimary());
+    ASSERT_FALSE(secondaryBlock->isPrimary());
+
+    // Remove them from their queues so we can re-insert with a clean baseline.
+    policy->claimBlock(primaryBlock);
+    policy->claimBlock(secondaryBlock);
+
+    // Re-insert: primaryBlock is enqueued into mFreeQueues[kPrimaryLevel=0],
+    //            secondaryBlock is enqueued into mFreeQueues[kSecondaryLevel=1].
+    // The stored tuple inside mFreeBlockIterators records (cacheLevel=0, ...) and
+    // (cacheLevel=1, ...) respectively at this point.
+    policy->releaseBlock(primaryBlock);
+    policy->releaseBlock(secondaryBlock);
+
+    ASSERT_EQ(policy->getNumFreeBlocks(0), NUM_PRIMARY_BLOCKS);
+    ASSERT_EQ(policy->getNumFreeBlocks(1), NUM_SECONDARY_BLOCKS);
+
+    // Simulate swapMemoryPoolBlockOffset: flip isPrimary() on both blocks.
+    // After this: primaryBlock->isPrimary() == false, secondaryBlock->isPrimary() == true.
+    // This is exactly what WindowBlockManager::getFreeBlock() did before calling claimBlock()
+    // in the pre-fix code — the ordering bug in PR #12297.
+    primaryBlock->swapMemoryPoolBlockOffset(secondaryBlock);
+
+    ASSERT_FALSE(primaryBlock->isPrimary());  // confirm the swap happened
+    ASSERT_TRUE(secondaryBlock->isPrimary()); // confirm the swap happened
+
+    // With old code (bare iterator + getCacheLevel()): getCacheLevel(primaryBlock) == 1 now,
+    // but primaryBlock's iterator lives in mFreeQueues[0] — erasing it from mFreeQueues[1]
+    // is undefined behavior and silently corrupts mNumFreeBlocksPerLevel counters.
+    // With the fix (stored tuple): claimBlock uses the recorded cacheLevel=0 and erases
+    // correctly from mFreeQueues[0], regardless of what isPrimary() currently returns.
+    policy->claimBlock(primaryBlock);
+    policy->claimBlock(secondaryBlock);
+
+    // Each block must have been removed from its ORIGINAL queue, not the post-swap one.
+    EXPECT_EQ(policy->getNumFreeBlocks(0), NUM_PRIMARY_BLOCKS - 1);
+    EXPECT_EQ(policy->getNumFreeBlocks(1), NUM_SECONDARY_BLOCKS - 1);
+
+    // No dangling iterators, double-erases, or corrupted list linkage.
+    EXPECT_TRUE(policy->verifyQueueIntegrity());
+}
+
 TEST_F(KvCacheRetentionConfigTest, InitializeTest)
 {
     // Invalid EOS