Skip to content

Commit 691b89c

Browse files
committed
fix
1 parent d498aaf commit 691b89c

File tree

8 files changed

+16
-20
lines changed

8 files changed

+16
-20
lines changed

lightllm/common/image_cache_manager.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def filter(self, uuid_list):
5858
"""
5959
for uuid in uuid_list:
6060
if uuid in self._gpu_cache:
61-
embed_cpu = self._gpu_cache[uuid].cpu(non_blocking=True)
61+
embed_cpu = self._gpu_cache[uuid].cpu()
6262
# Move to CPU cache and remove from GPU cache
6363
self._gpu_cache.pop(uuid)
6464
if uuid in self._cpu_cache:
@@ -68,6 +68,8 @@ def filter(self, uuid_list):
6868
self._cpu_cache.popitem(last=False)
6969
elif uuid in self._cpu_cache:
7070
self._cpu_cache.move_to_end(uuid)
71+
print(self._gpu_cache.keys())
72+
print(self._cpu_cache.keys())
7173
return
7274

7375

lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ def context_forward(self, input_ids, infer_state: LlamaInferStateInfo, layer_wei
6868
if self.disable_extra_process_for_multimodal:
6969
img_embed = image_cache_manager.get_embed(img["uuid"])
7070
img_weight.append(img_embed.reshape(img["token_num"], -1))
71-
print(img_weight[-1].shape)
7271
else:
7372
data = read_shm(get_shm_name_embed(img["uuid"]))
7473
img_weight.append(bytes2tensor(data).cuda().reshape(img["token_num"], -1))

lightllm/models/vit/layer_infer/pre_layer_infer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def __init__(self, network_config, mode):
1616
self.tp_world_size_ = get_dp_world_size()
1717
self.network_config_ = network_config
1818
self.mode = mode
19+
print(f"tp_rank_: {self.tp_rank_}, tp_world_size_: {self.tp_world_size_}")
1920
return
2021

2122
def forward(self, pixel_values, layer_weight: ViTPreAndPostLayerWeight):

lightllm/models/vit/layer_infer/transformer_layer_infer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class ViTTransformerLayerInfer:
2121
def __init__(self, layer_num, network_config, mode=[]):
2222
self.tp_rank_ = get_current_rank_in_dp()
2323
self.tp_world_size_ = get_dp_world_size()
24+
print(f"tp_rank_: {self.tp_rank_}, tp_world_size_: {self.tp_world_size_}")
2425
self.eps_ = network_config["layer_norm_eps"]
2526
self.head_num = network_config["num_attention_heads"]
2627
self.tp_padding_head_num = network_config["padding_head_num"] // self.tp_world_size_

lightllm/models/vit/layer_weights/pre_and_post_layer_weight.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@
55
from lightllm.common.basemodel import PreAndPostLayerWeight
66
from lightllm.utils.dist_utils import (
77
get_current_device_id,
8-
get_global_rank,
9-
get_global_world_size,
8+
get_current_rank_in_dp,
9+
get_dp_world_size,
1010
)
11-
from lightllm.utils.envs_utils import get_env_start_args
1211

1312

1413
class ViTPreAndPostLayerWeight(PreAndPostLayerWeight):
@@ -18,10 +17,8 @@ def __init__(self, data_type, network_config, mode):
1817
self.image_size = self.network_config_["image_size"]
1918
self.patch_size = self.network_config_["patch_size"]
2019
self.llm_hidden_size = self.network_config_["llm_hidden_size"]
21-
if get_env_start_args().disable_extra_process_for_multimodal:
22-
self.tp_world_size_ = get_global_world_size()
23-
self.tp_rank_ = get_global_rank()
24-
20+
self.tp_rank_ = get_current_rank_in_dp()
21+
self.tp_world_size_ = get_dp_world_size()
2522
return
2623

2724
def _cuda(self, cpu_tensor):

lightllm/models/vit/layer_weights/transformer_layer_weight.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,12 @@
1313
)
1414
from lightllm.utils.dist_utils import (
1515
get_current_device_id,
16-
get_global_rank,
17-
get_global_world_size,
1816
)
19-
from lightllm.utils.envs_utils import get_env_start_args
2017

2118

2219
class ViTTransformerLayerWeight(TransformerLayerWeight):
2320
def __init__(self, layer_num, data_type, network_config, mode=[], quant_cfg=None):
2421
super().__init__(layer_num, data_type, network_config, mode, quant_cfg)
25-
if get_env_start_args().disable_extra_process_for_multimodal:
26-
self.tp_world_size_ = get_global_world_size()
27-
self.tp_rank_ = get_global_rank()
2822
return
2923

3024
def _cuda(self, cpu_tensor):

lightllm/models/vit/model.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,7 @@ class VisionTransformer:
3838
post_layer_infer_class = ViTPostLayerInfer
3939

4040
def __init__(self, kvargs):
41-
if get_env_start_args().disable_extra_process_for_multimodal:
42-
# if we don't assign an extra process for visual model, the visual model uses tensor parallel by default.
43-
self.tp_world_size_ = get_global_world_size()
44-
else:
45-
self.tp_world_size_ = get_dp_world_size()
41+
self.tp_world_size_ = get_dp_world_size()
4642
self.weight_dir_ = kvargs["weight_dir"]
4743
self.load_way = kvargs.get("load_way", "HF")
4844
self.mode = [m.replace("int4weight", "w4a16").replace("int8weight", "w8a16") for m in kvargs.get("mode", [])]

lightllm/server/router/model_infer/infer_batch.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from dataclasses import dataclass, field
1010
from typing import List, Dict, Tuple, Optional, Union, Any
1111
from lightllm.common.req_manager import ReqManager
12+
from lightllm.common.image_cache_manager import image_cache_manager
1213
from lightllm.utils.infer_utils import mark_start, mark_end
1314
from lightllm.server.core.objs import Req, SamplingParams, FinishStatus, ShmReqManager
1415
from lightllm.server.router.dynamic_prompt.radix_cache import RadixCache, TreeNode
@@ -131,6 +132,7 @@ def filter(self, finished_request_ids: List[int]):
131132

132133
free_req_index = []
133134
free_token_index = []
135+
image_uuid_list = []
134136
for request_id in finished_request_ids:
135137
req: InferReq = self.requests_mapping.pop(request_id)
136138
group_req_id = convert_sub_id_to_group_id(req.shm_req.request_id)
@@ -145,6 +147,10 @@ def filter(self, finished_request_ids: List[int]):
145147
# logger.info(f"infer release req id {req.shm_req.request_id}")
146148
req.shm_req.shm_infer_released = True
147149
self.shm_req_manager.put_back_req_obj(req.shm_req)
150+
if req.multimodal_params is not None and get_env_start_args().disable_extra_process_for_multimodal:
151+
for img in req.multimodal_params["images"]:
152+
image_uuid_list.append(img["uuid"])
153+
image_cache_manager.filter(image_uuid_list)
148154

149155
free_token_index = custom_cat(free_token_index)
150156
self.req_manager.free(free_req_index, free_token_index)

0 commit comments

Comments
 (0)