|
31 | 31 | from lightllm.server.router.shm_reqs_io_buffer import ShmReqsIOBuffer |
32 | 32 | from lightllm.server.router.model_infer.mode_backend.overlap_events import OverlapEventManager, OverlapEventPack |
33 | 33 | from lightllm.models.deepseek_mtp.model import Deepseek3MTPModel |
34 | | -from .multi_level_cache_manager import MultiLevelCacheManager |
| 34 | +from .multi_level_cache import MultiLevelCacheModule |
35 | 35 |
|
36 | 36 |
|
37 | 37 | class ModeBackend: |
@@ -201,7 +201,7 @@ def init_model(self, kvargs): |
201 | 201 | self.infer_loop_thread1.start() |
202 | 202 |
|
203 | 203 | if self.args.enable_cpu_cache: |
204 | | - self.multi_level_cache_manager = MultiLevelCacheManager(self) |
| 204 | + self.multi_level_cache_module = MultiLevelCacheModule(self) |
205 | 205 | return |
206 | 206 |
|
207 | 207 | def init_custom(self): |
@@ -355,7 +355,7 @@ def _init_reqs(self, reqs: List[Tuple]): |
355 | 355 | def _fill_cpu_cache_to_reqs(self, req_ids): |
356 | 356 | req_objs: List[InferReq] = [g_infer_context.requests_mapping[req_id] for req_id in req_ids] |
357 | 357 | g_infer_state_lock.acquire() |
358 | | - self.multi_level_cache_manager.fill_cpu_cache_to_reqs(reqs=req_objs) |
| 358 | + self.multi_level_cache_module.fill_cpu_cache_to_reqs(reqs=req_objs) |
359 | 359 | g_infer_state_lock.release() |
360 | 360 | return |
361 | 361 |
|
@@ -386,7 +386,7 @@ def _get_classed_reqs( |
386 | 386 | 5. decode_reqs 需要进行decode操作的请求 |
387 | 387 | """ |
388 | 388 | if self.args.enable_cpu_cache: |
389 | | - self.multi_level_cache_manager.update_cpu_cache_task_states() |
| 389 | + self.multi_level_cache_module.update_cpu_cache_task_states() |
390 | 390 |
|
391 | 391 | if req_ids is None: |
392 | 392 | req_ids = g_infer_context.infer_req_ids |
@@ -469,7 +469,7 @@ def _get_classed_reqs( |
469 | 469 | self._pre_handle_finished_reqs(finished_reqs=finished_reqs) |
470 | 470 | # 如果使能了 cpu cache 功能,对于已经完成的请求,进行 gpu kv 卸载到 cpu cache的操作。 |
471 | 471 | if self.args.enable_cpu_cache: |
472 | | - true_finished_reqs = self.multi_level_cache_manager.handle_finished_reqs(finished_reqs=finished_reqs) |
| 472 | + true_finished_reqs = self.multi_level_cache_module.handle_finished_reqs(finished_reqs=finished_reqs) |
473 | 473 | else: |
474 | 474 | true_finished_reqs = finished_reqs |
475 | 475 |
|
|
0 commit comments