diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
index 2012c3fef88b..1ce7a6944ed1 100644
--- a/vllm/v1/core/kv_cache_manager.py
+++ b/vllm/v1/core/kv_cache_manager.py
@@ -267,16 +267,6 @@ def allocate_slots(
         else:
             new_computed_block_list = self.empty_kv_cache_blocks.blocks
 
-        # Free the blocks that are skipped during the attention computation
-        # (e.g., tokens outside the sliding window).
-        # We can do this even if we cannot schedule this request due to
-        # insufficient free blocks.
-        # Should call this function before allocating new blocks to reduce
-        # the number of evicted blocks.
-        self.coordinator.remove_skipped_blocks(
-            request.request_id, request.num_computed_tokens
-        )
-
         # The number of computed tokens is the number of computed tokens plus
         # the new prefix caching hits
         num_computed_tokens = request.num_computed_tokens + num_new_computed_tokens
@@ -292,6 +282,32 @@ def allocate_slots(
             num_encoder_tokens=num_encoder_tokens,
         )
 
+        if (
+            num_blocks_to_allocate == 0
+            and new_computed_block_list is self.empty_kv_cache_blocks.blocks
+        ):
+            # Early return: no new blocks needed to be allocated
+            #
+            # NOTE: This optimization may delay block cleanup (remove_skipped_blocks)
+            # in rare edge cases, but the impact is negligible.
+            #
+            # Example: With sliding windows whose size is
+            # not divisible by block size, the first block
+            # may slide out of the window (becoming eligible for removal)
+            # even when no new blocks are allocated at the end. In the worst case,
+            # this delays removal of 1 block per request per single type manager.
+            return self.empty_kv_cache_blocks
+
+        # Free the blocks that are skipped during the attention computation
+        # (e.g., tokens outside the sliding window).
+        # We can do this even if we cannot schedule this request due to
+        # insufficient free blocks.
+        # Should call this function before allocating new blocks to reduce
+        # the number of evicted blocks.
+        self.coordinator.remove_skipped_blocks(
+            request.request_id, request.num_computed_tokens
+        )
+
         if num_blocks_to_allocate > self.block_pool.get_num_free_blocks():
             # Cannot allocate new blocks
             return None