Skip to content

Commit 81cbc03

Browse files
committed
merge
2 parents c99bb46 + 1ae9cd3 commit 81cbc03

File tree

6 files changed

+29
-32
lines changed

6 files changed

+29
-32
lines changed

lightllm/models/internvl/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def init_imageitem_extral_params(
6464
img.extra_params["image_patch_max_num"] = 6
6565
elif num_images > 6:
6666
img.extra_params["image_patch_max_num"] = 0
67+
img.patch_num = self.get_image_patch(img)
6768
return
6869

6970
def init_audioitem_extral_params(

lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,11 @@ def context_forward(self, input_ids, infer_state: LlamaInferStateInfo, layer_wei
5353
if img["token_id"] in img_start_token_ids or img["_prefill_"] is False:
5454
continue
5555
# pull the img_embeds by uid from shm or afs
56-
if self.args.run_mode == "llm_only":
57-
data = read_afs(get_shm_name_embed(img["uuid"]))
56+
if self.args.enable_remote_vit:
57+
embed = read_afs(get_shm_name_embed(img["uuid"]))
5858
else:
59-
data = read_shm(get_shm_name_embed(img["uuid"]))
60-
img_weight.append(bytes2tensor(data).cuda().reshape(img["token_num"], -1))
59+
embed = read_shm(get_shm_name_embed(img["uuid"]))
60+
img_weight.append(bytes2tensor(embed).cuda().reshape(img["token_num"], -1))
6161
img_start_token_ids.append(img["token_id"])
6262
img_token_lens.append(img["token_num"])
6363
img_start_locs.append(img_start_loc)

lightllm/server/embed_cache/impl/memory_cache_with_redis.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,15 @@ def release(self, ids: list[int]) -> None:
3636
self.redis_cache.decr(id_)
3737

3838
def set_items_data(self, ids: list[int]) -> None:
39-
pass
39+
for id_ in ids:
40+
self._records[id_].data = True
4041

4142
def get_items_data(self, ids: list[int]) -> list[Optional[bool]]:
4243
return [self._records.get(id_).data if id_ in self._records else False for id_ in ids]
4344

4445
def set_items_embed(self, ids: list[int]) -> None:
45-
pass
46+
for id in ids:
47+
self.redis_cache.insert(id)
4648

4749
def get_items_embed(self, ids: list[int]) -> list[Optional[bool]]:
48-
pass
50+
return [self.redis_cache.query_and_incre(id) for id in ids]

lightllm/server/httpserver/manager.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,9 @@ async def _alloc_resource(self, items, md5sums, token_nums, datas):
133133
item.token_num = rec["token_num"]
134134
uid_list.append(rec["id"])
135135

136-
# If enable the vit/audio-llm disaggregation, no need to cache the data in the memory of the server
137-
if self.enable_remote_vit:
138-
return
136+
# # If enable the vit/audio-llm disaggregation, no need to cache the data in the memory of the server
137+
# if self.enable_remote_vit:
138+
# return
139139

140140
ready_flags = obtain(self.cache_client.root.get_items_data(uid_list))
141141
update_data_ids = []
@@ -159,11 +159,10 @@ async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams,
159159
items, md5sums, tokens_nums, datas = [], [], [], []
160160
for img in multimodal_params.images:
161161
self.tokenizer.init_imageitem_extral_params(img, multimodal_params, sampling_params)
162-
patch_num = self.tokenizer.get_image_patch(img)
163162
data = img.read()
164163
# must after init_imageitem_extral_params
165164
token_num = self.tokenizer.get_image_token_length(img)
166-
md5sum = "{}_{}".format(hashlib.md5(data).hexdigest(), patch_num)
165+
md5sum = "{}_{}".format(hashlib.md5(data).hexdigest(), img.patch_num)
167166
md5sums.append(md5sum)
168167
tokens_nums.append(token_num)
169168
datas.append(data)

lightllm/server/multimodal_params.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,7 @@ def __init__(self, **kwargs):
7878
self.token_num = None
7979
self.image_w = 0
8080
self.image_h = 0
81-
self.afs_embed = False
82-
self.is_abort = False
81+
self.patch_num = 0
8382

8483
self._preload_data = None
8584
self.extra_params = {}
@@ -114,8 +113,8 @@ async def preload(self, request: Request):
114113
def read(self):
115114
assert self._preload_data is not None
116115
ans = self._preload_data
117-
# self._preload_data = None
118-
# self._data = None
116+
self._preload_data = None
117+
self._data = None
119118
return ans
120119

121120
def to_dict(self):

lightllm/server/visualserver/manager.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ def __init__(
4343
self.args = args
4444
self.remote_vit = args.enable_remote_vit or args.run_mode == "visual"
4545
self.cache_port = cache_port
46+
self.visual_port = visual_port
47+
self.next_module_port = next_module_port
4648
self.waiting_reqs: List[GroupReqIndexes] = []
4749
self.infer_batch_size = args.visual_infer_batch_size
4850
self.trust_remote_code = args.trust_remote_code
@@ -151,20 +153,16 @@ async def loop_for_fwd(self):
151153
processing_group_reqs = []
152154
images_need_infer = []
153155

154-
def _recv_reqs(self):
155-
if self.remote_vit:
156-
recv_req: GroupReqIndexes = self.vit_receiver.recv_pyobj(zmq.NOBLOCK)
157-
for img in recv_req.multimodal_params.images:
158-
image_patch = self.tokenizer.get_image_patch_func(img)
159-
data = img._preload_data
160-
# img._preload_data = None
161-
md5sum = "{}_{}".format(hashlib.md5(data).hexdigest(), image_patch)
162-
md5 = int(md5sum, 16)
163-
# create_shm(get_shm_name_data(uid), data)
164-
self.cache_client.root.set_items_data([md5])
165-
return recv_req
166-
else:
167-
return self.vit_receiver.recv_pyobj(zmq.NOBLOCK)
156+
# def _recv_reqs(self):
157+
# if self.remote_vit:
158+
# recv_req: GroupReqIndexes = self.recv_from_httpserver.recv_pyobj(zmq.NOBLOCK)
159+
# recv_req.multimodal_params.images[:]= [
160+
# img for img in recv_req.multimodal_params.images
161+
# if not self.cache_client.root.get_item_embed(img.uuid) # embed已存在的被丢弃 , ref +1
162+
# ]
163+
# return recv_req
164+
# else:
165+
# return self.recv_from_httpserver.recv_pyobj(zmq.NOBLOCK)
168166

169167
async def loop_for_netio_req(self):
170168
if not hasattr(self, "visual_recv_max_count"):
@@ -199,8 +197,6 @@ async def loop_for_fwd_visual_only(self):
199197
visual_req = self.waiting_reqs.pop(0)
200198

201199
for img in visual_req.multimodal_params.images:
202-
if img.is_abort:
203-
continue
204200
images_need_infer.append(img)
205201

206202
if len(images_need_infer) == self.infer_batch_size:

0 commit comments

Comments
 (0)