Skip to content

Commit 57372e5

Browse files
author
sangchengmeng
committed
[fix]0402
1 parent 01b3f68 commit 57372e5

File tree

7 files changed

+36
-25
lines changed

7 files changed

+36
-25
lines changed

lightllm/models/internvl/model.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,11 @@ def __init__(self, tokenizer, model_cfg, **kwargs):
4141
self.image_end_id = tokenizer.convert_tokens_to_ids(self.image_end_tag)
4242
self.get_image_patch_func = get_image_patch_func(kwargs["weight_dir"])
4343

44-
def init_imageItem_extral_params(self, img: ImageItem, multi_params: MultimodalParams, image_max_patch_num: int):
45-
if image_max_patch_num >= 0:
46-
img.extra_params["image_patch_max_num"] = image_max_patch_num
44+
def init_imageItem_extral_params(
45+
self, img: ImageItem, multi_params: MultimodalParams, sampling_params: SamplingParams
46+
):
47+
if sampling_params.image_max_patch_num >= 0:
48+
img.extra_params["image_patch_max_num"] = sampling_params.image_max_patch_num
4749
return
4850
elif os.getenv("MAX_PATCH_NUM"):
4951
img.extra_params["image_patch_max_num"] = int(os.getenv("MAX_PATCH_NUM"))

lightllm/models/llava/model.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from lightllm.models.qwen_vl.layer_infer.pre_layer_infer import LlamaMultimodalPreLayerInfer
77
from lightllm.models.llava.layer_weights.pre_and_post_layer_weight import LlavaPreAndPostLayerWeight
88
from lightllm.server.multimodal_params import MultimodalParams, ImageItem
9+
from lightllm.server.core.objs import SamplingParams
910
from lightllm.common.build_utils import repair_config
1011
from transformers import AutoConfig
1112

@@ -33,7 +34,9 @@ def __init__(self, tokenizer, model_cfg):
3334
self.image_length = (image_size // patch_size) ** 2
3435
self.skip_start = model_cfg.get("skip_start", True)
3536

36-
def init_imageItem_extral_params(self, img: ImageItem, multi_params: MultimodalParams, image_max_patch_num: int):
37+
def init_imageItem_extral_params(
38+
self, img: ImageItem, multi_params: MultimodalParams, sampling_params: SamplingParams
39+
):
3740
return
3841

3942
def get_image_token_length(self, img: ImageItem):

lightllm/models/qwen2_vl/model.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from transformers.feature_extraction_utils import BatchFeature
88
from transformers.image_utils import ImageInput
99
from transformers.processing_utils import ProcessorMixin
10+
from lightllm.server.core.objs import SamplingParams
1011
from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
1112
from typing import List, Optional, Union
1213
from transformers.utils import TensorType, logging
@@ -31,7 +32,9 @@ def __init__(self, tokenizer=None, image_processor=None, **kwargs):
3132
self.image_end_id = kwargs["model_cfg"]["vision_end_token_id"]
3233
self.image_token_id = kwargs["model_cfg"]["image_token_id"]
3334

34-
def init_imageItem_extral_params(self, img: ImageItem, multi_params: MultimodalParams, image_max_patch_num: int):
35+
def init_imageItem_extral_params(
36+
self, img: ImageItem, multi_params: MultimodalParams, sampling_params: SamplingParams
37+
):
3538
return
3639

3740
def get_image_token_length(self, img: ImageItem):

lightllm/models/qwen_vl/model.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import numpy as np
33
import unicodedata
4+
from lightllm.server.core.objs import SamplingParams
45
from lightllm.models.qwen.model import QWenTpPartModel
56
from .layer_infer.pre_layer_infer import LlamaMultimodalPreLayerInfer
67
from lightllm.server.multimodal_params import MultimodalParams, ImageItem
@@ -19,7 +20,9 @@ def __init__(self, tokenizer, model_cfg):
1920
# <imgpad>: 151859
2021
self.image_length = model_cfg["visual"].get("n_queries", 256)
2122

22-
def init_imageItem_extral_params(self, img: ImageItem, multi_params: MultimodalParams, image_max_patch_num: int):
23+
def init_imageItem_extral_params(
24+
self, img: ImageItem, multi_params: MultimodalParams, sampling_params: SamplingParams
25+
):
2326
return
2427

2528
def _list_find(self, input_list, target, start_idx):

lightllm/server/api_http.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,12 +364,22 @@ async def tokens(request: Request):
364364
try:
365365
request_dict = await request.json()
366366
prompt = request_dict.pop("text")
367-
parameters = request_dict.pop("parameters", {})
367+
sample_params_dict = request_dict.pop("parameters", {})
368+
369+
sampling_params = SamplingParams()
370+
sampling_params.init(tokenizer=g_objs.httpserver_manager.tokenizer, **sample_params_dict)
371+
sampling_params.verify()
372+
368373
multimodal_params_dict = request_dict.get("multimodal_params", {})
369374
multimodal_params = MultimodalParams(**multimodal_params_dict)
370375
multimodal_params.verify_and_preload()
371376
return JSONResponse(
372-
{"ntokens": g_objs.httpserver_manager.tokens(prompt, multimodal_params, parameters)}, status_code=200
377+
{
378+
"ntokens": g_objs.httpserver_manager.tokens(
379+
prompt, multimodal_params, sampling_params, sample_params_dict
380+
)
381+
},
382+
status_code=200,
373383
)
374384
except Exception as e:
375385
return create_error_response(HTTPStatus.EXPECTATION_FAILED, f"error: {str(e)}")

lightllm/server/embed_cache/utils.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,6 @@ def tensor2bytes(t):
1515
return buf.read()
1616

1717

18-
def image2base64(img_str: str):
19-
image_obj = Image.open(img_str)
20-
if image_obj.format is None:
21-
raise ValueError("No image format found.")
22-
buffer = BytesIO()
23-
image_obj.save(buffer, format=image_obj.format)
24-
return base64.b64encode(buffer.getvalue()).decode("utf-8")
25-
26-
2718
def bytes2tensor(b):
2819
# return torch.from_numpy(np.frombuffer(b, dtype=np.float16)).cuda()
2920
return torch.load(BytesIO(b))

lightllm/server/httpserver/manager.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ def __init__(
110110
# connect cache server, calculate md5, alloc resource, return uuid
111111
async def _alloc_resource(self, img: ImageItem):
112112
data = img.read()
113+
# must after init_imageItem_extral_params
113114
num_tokens = self.tokenizer.get_image_token_length(img)
114115
md5sum = hashlib.md5(data).hexdigest() + "_" + str(hash(frozendict(img.extra_params)))
115116
wait_time = 1
@@ -127,11 +128,11 @@ async def _alloc_resource(self, img: ImageItem):
127128
await asyncio.sleep(wait_time)
128129
wait_time = min(wait_time + 2, 9)
129130

130-
async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams, image_max_patch_num):
131+
async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams, sampling_params: SamplingParams):
131132
# 只有 P 和 NORMAL 节点需要真的管理多模态资源
132133
if self.pd_mode.is_P_or_NORMAL():
133134
for img in multimodal_params.images:
134-
self.tokenizer.init_imageItem_extral_params(img, multimodal_params, image_max_patch_num)
135+
self.tokenizer.init_imageItem_extral_params(img, multimodal_params, sampling_params)
135136
record = await self._alloc_resource(img)
136137
img.uuid = record["id"]
137138
img.token_id = record["token_id"]
@@ -151,15 +152,15 @@ async def _release_multimodal_resources(self, multimodal_params: MultimodalParam
151152
img.token_num = None
152153
return
153154

154-
def tokens(self, prompt, multimodal_params, kwargs=None):
155+
def tokens(self, prompt, multimodal_params, samping_params=SamplingParams, kwargs=None):
155156
kwargs = {} if kwargs is None else kwargs
156157
prompt_ids = self.tokenizer.encode(prompt, None, **kwargs)
157158
image_tokens = 0
158159
img_count = 0
159-
max_num = multimodal_params.max_num
160160
for img in multimodal_params.images:
161161
img_count += 1
162-
image_tokens += self.tokenizer.get_image_token_length(img, max_num)
162+
self.tokenizer.init_imageItem_extral_params(img, multimodal_params, samping_params)
163+
image_tokens += self.tokenizer.get_image_token_length(img)
163164
return len(prompt_ids) + image_tokens + img_count
164165

165166
async def loop_for_request(self):
@@ -307,9 +308,7 @@ async def _encode(
307308
if isinstance(prompt, str):
308309
if self.enable_multimodal:
309310
assert len(multimodal_params.images) <= self.args.cache_capacity, "too many images!"
310-
await self._alloc_multimodal_resources(
311-
multimodal_params, image_max_patch_num=sampling_params.image_max_patch_num
312-
)
311+
await self._alloc_multimodal_resources(multimodal_params, sampling_params)
313312
prompt_ids = self.tokenizer.encode(
314313
prompt, multimodal_params, add_special_tokens=sampling_params.add_special_tokens
315314
)

0 commit comments

Comments
 (0)