Skip to content

Commit 0b7ca92

Browse files
committed
fix
1 parent 8e91c9c commit 0b7ca92

File tree

4 files changed

+11
-7
lines changed

4 files changed

+11
-7
lines changed

lightllm/server/embed_cache/copy_to_cache.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ def _offload_embed_tensor_to_cache(
2424
dest_index = (start_index_in_cache + token_index).to(tl.int64)
2525

2626
for layer_index in range(layer_num):
27-
layer_index = layer_index.to(tl.int64)
2827
for block_index in range(tl.cdiv(hidden_size, BLOCK)):
2928
off = block_index * BLOCK + tl.arange(0, BLOCK)
3029
mask = off < hidden_size

lightllm/server/embed_cache/impl/naive_memory_cache.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ class Record(object):
2828
token_num: int
2929
mem_block: MemoryBlock
3030

31+
def __hash__(self) -> int:
32+
return self.id
33+
3134

3235
class InMemoryCache:
3336
def __init__(self, args) -> None:
@@ -162,7 +165,7 @@ def alloc(self, md5sum_list: list[str], token_num_list: list[int]) -> Optional[l
162165
free_min_count=new_needed - (self.capacity - self.occupied), new_md5_dict=new_md5_dict
163166
)
164167
if len(alloc_md5_dict) == len(new_md5_dict):
165-
for md5sum, mem_block in alloc_md5_dict:
168+
for md5sum, mem_block in alloc_md5_dict.items():
166169
token_num = new_md5_dict[md5sum]
167170
uid_int = uuid.uuid1().int
168171
self._check_and_set_new_id_range(token_num)

lightllm/server/router/model_infer/infer_batch.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def register(
4343
radix_cache: RadixCache,
4444
shm_req_manager: ShmReqManager,
4545
vocab_size: int,
46-
cpu_embed_cache_client: Optional[CpuEmbedCacheClient] = None,
4746
):
4847
self.args = get_env_start_args()
4948
from lightllm.server.router.model_infer.mode_backend.base_backend import ModeBackend
@@ -58,7 +57,10 @@ def register(
5857
self.infer_req_ids = []
5958

6059
self.vocab_size = vocab_size
61-
self.cpu_embed_cache_client = cpu_embed_cache_client
60+
return
61+
62+
def init_cpu_embed_cache_client(self):
63+
self.cpu_embed_cache_client = CpuEmbedCacheClient(create_meta_data=False, init_shm_data=False)
6264
return
6365

6466
def get_overlap_stream(self) -> torch.cuda.Stream:

lightllm/server/router/model_infer/mode_backend/base_backend.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,9 @@ def init_model(self, kvargs):
138138
self.multi_level_cache_module = MultiLevelKvCacheModule(self)
139139
wait_events.append(self.multi_level_cache_module)
140140

141+
if self.args.enable_multimodal:
142+
g_infer_context.init_cpu_embed_cache_client()
143+
141144
model_cfg, _ = PretrainedConfig.get_config_dict(self.weight_dir)
142145

143146
model_kvargs = {
@@ -187,9 +190,6 @@ def init_model(self, kvargs):
187190
radix_cache=self.radix_cache,
188191
shm_req_manager=self.shm_req_manager,
189192
vocab_size=self.model.vocab_size,
190-
cpu_embed_cache_client=CpuEmbedCacheClient(create_meta_data=False, init_shm_data=False)
191-
if self.args.enable_multimodal
192-
else None,
193193
)
194194

195195
# 初始化 dp 模式使用的通信 tensor, 对于非dp模式,不会使用到

0 commit comments

Comments
 (0)