Skip to content

Commit 38e734e

Browse files
authored
[Feature] support hierarchical cache in v1 (#3939)
1 parent 051e4a8 commit 38e734e

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,15 @@ def schedule(self):
348348
if request.status == RequestStatus.WAITING:
349349
# Enable prefix caching
350350
if self.config.cache_config.enable_prefix_caching:
351+
if (
352+
self.config.cache_config.enable_hierarchical_cache
353+
and self.cache_manager.num_cpu_blocks > 0
354+
):
355+
if not self.cache_manager.can_allocate_gpu_blocks(
356+
(request.need_prefill_tokens + self.config.cache_config.block_size - 1)
357+
// self.config.cache_config.block_size
358+
): # to prevent block allocation for matching in hierarchical cache and cause dead lock
359+
break
351360
success = self.get_prefix_cached_blocks(request)
352361
if not success:
353362
self._free_blocks(request)
@@ -387,6 +396,15 @@ def schedule(self):
387396
request.num_total_tokens
388397
) # Before preempted task rescheduled, preempted task has been sent to engine, no more tokens are output, here num_total_tokens should be static and correct
389398
if self.config.cache_config.enable_prefix_caching:
399+
if (
400+
self.config.cache_config.enable_hierarchical_cache
401+
and self.cache_manager.num_cpu_blocks > 0
402+
):
403+
if not self.cache_manager.can_allocate_gpu_blocks(
404+
(request.need_prefill_tokens + self.config.cache_config.block_size - 1)
405+
// self.config.cache_config.block_size
406+
): # to prevent block allocation for matching in hierarchical cache and cause dead lock
407+
break
390408
success = self.get_prefix_cached_blocks(request)
391409
if not success:
392410
self._free_blocks(request)

0 commit comments

Comments
 (0)