[fix]0402

sangchengmeng · sangchengmeng · commit 57372e5f55e7 · 2025-04-02T11:46:56.000+08:00
diff --git a/lightllm/models/internvl/model.py b/lightllm/models/internvl/model.py
@@ -41,9 +41,11 @@ def __init__(self, tokenizer, model_cfg, **kwargs):
         self.image_end_id = tokenizer.convert_tokens_to_ids(self.image_end_tag)
         self.get_image_patch_func = get_image_patch_func(kwargs["weight_dir"])
 
-    def init_imageItem_extral_params(self, img: ImageItem, multi_params: MultimodalParams, image_max_patch_num: int):
-        if image_max_patch_num >= 0:
-            img.extra_params["image_patch_max_num"] = image_max_patch_num
+    def init_imageItem_extral_params(
+        self, img: ImageItem, multi_params: MultimodalParams, sampling_params: SamplingParams
+    ):
+        if sampling_params.image_max_patch_num >= 0:
+            img.extra_params["image_patch_max_num"] = sampling_params.image_max_patch_num
             return
         elif os.getenv("MAX_PATCH_NUM"):
             img.extra_params["image_patch_max_num"] = int(os.getenv("MAX_PATCH_NUM"))
diff --git a/lightllm/models/llava/model.py b/lightllm/models/llava/model.py
@@ -6,6 +6,7 @@
 from lightllm.models.qwen_vl.layer_infer.pre_layer_infer import LlamaMultimodalPreLayerInfer
 from lightllm.models.llava.layer_weights.pre_and_post_layer_weight import LlavaPreAndPostLayerWeight
 from lightllm.server.multimodal_params import MultimodalParams, ImageItem
+from lightllm.server.core.objs import SamplingParams
 from lightllm.common.build_utils import repair_config
 from transformers import AutoConfig
 
@@ -33,7 +34,9 @@ def __init__(self, tokenizer, model_cfg):
         self.image_length = (image_size // patch_size) ** 2
         self.skip_start = model_cfg.get("skip_start", True)
 
-    def init_imageItem_extral_params(self, img: ImageItem, multi_params: MultimodalParams, image_max_patch_num: int):
+    def init_imageItem_extral_params(
+        self, img: ImageItem, multi_params: MultimodalParams, sampling_params: SamplingParams
+    ):
         return
 
     def get_image_token_length(self, img: ImageItem):
diff --git a/lightllm/models/qwen2_vl/model.py b/lightllm/models/qwen2_vl/model.py
@@ -7,6 +7,7 @@
 from transformers.feature_extraction_utils import BatchFeature
 from transformers.image_utils import ImageInput
 from transformers.processing_utils import ProcessorMixin
+from lightllm.server.core.objs import SamplingParams
 from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
 from typing import List, Optional, Union
 from transformers.utils import TensorType, logging
@@ -31,7 +32,9 @@ def __init__(self, tokenizer=None, image_processor=None, **kwargs):
         self.image_end_id = kwargs["model_cfg"]["vision_end_token_id"]
         self.image_token_id = kwargs["model_cfg"]["image_token_id"]
 
-    def init_imageItem_extral_params(self, img: ImageItem, multi_params: MultimodalParams, image_max_patch_num: int):
+    def init_imageItem_extral_params(
+        self, img: ImageItem, multi_params: MultimodalParams, sampling_params: SamplingParams
+    ):
         return
 
     def get_image_token_length(self, img: ImageItem):
diff --git a/lightllm/models/qwen_vl/model.py b/lightllm/models/qwen_vl/model.py
@@ -1,6 +1,7 @@
 import json
 import numpy as np
 import unicodedata
+from lightllm.server.core.objs import SamplingParams
 from lightllm.models.qwen.model import QWenTpPartModel
 from .layer_infer.pre_layer_infer import LlamaMultimodalPreLayerInfer
 from lightllm.server.multimodal_params import MultimodalParams, ImageItem
@@ -19,7 +20,9 @@ def __init__(self, tokenizer, model_cfg):
         # <imgpad>: 151859
         self.image_length = model_cfg["visual"].get("n_queries", 256)
 
-    def init_imageItem_extral_params(self, img: ImageItem, multi_params: MultimodalParams, image_max_patch_num: int):
+    def init_imageItem_extral_params(
+        self, img: ImageItem, multi_params: MultimodalParams, sampling_params: SamplingParams
+    ):
         return
 
     def _list_find(self, input_list, target, start_idx):
diff --git a/lightllm/server/api_http.py b/lightllm/server/api_http.py
@@ -364,12 +364,22 @@ async def tokens(request: Request):
     try:
         request_dict = await request.json()
         prompt = request_dict.pop("text")
-        parameters = request_dict.pop("parameters", {})
+        sample_params_dict = request_dict.pop("parameters", {})
+
+        sampling_params = SamplingParams()
+        sampling_params.init(tokenizer=g_objs.httpserver_manager.tokenizer, **sample_params_dict)
+        sampling_params.verify()
+
         multimodal_params_dict = request_dict.get("multimodal_params", {})
         multimodal_params = MultimodalParams(**multimodal_params_dict)
         multimodal_params.verify_and_preload()
         return JSONResponse(
-            {"ntokens": g_objs.httpserver_manager.tokens(prompt, multimodal_params, parameters)}, status_code=200
+            {
+                "ntokens": g_objs.httpserver_manager.tokens(
+                    prompt, multimodal_params, sampling_params, sample_params_dict
+                )
+            },
+            status_code=200,
         )
     except Exception as e:
         return create_error_response(HTTPStatus.EXPECTATION_FAILED, f"error: {str(e)}")
diff --git a/lightllm/server/embed_cache/utils.py b/lightllm/server/embed_cache/utils.py
@@ -15,15 +15,6 @@ def tensor2bytes(t):
     return buf.read()
 
 
-def image2base64(img_str: str):
-    image_obj = Image.open(img_str)
-    if image_obj.format is None:
-        raise ValueError("No image format found.")
-    buffer = BytesIO()
-    image_obj.save(buffer, format=image_obj.format)
-    return base64.b64encode(buffer.getvalue()).decode("utf-8")
-
-
 def bytes2tensor(b):
     # return torch.from_numpy(np.frombuffer(b, dtype=np.float16)).cuda()
     return torch.load(BytesIO(b))
diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py
@@ -110,6 +110,7 @@ def __init__(
     # connect cache server, calculate md5, alloc resource, return uuid
     async def _alloc_resource(self, img: ImageItem):
         data = img.read()
+        # must after init_imageItem_extral_params
         num_tokens = self.tokenizer.get_image_token_length(img)
         md5sum = hashlib.md5(data).hexdigest() + "_" + str(hash(frozendict(img.extra_params)))
         wait_time = 1
@@ -127,11 +128,11 @@ async def _alloc_resource(self, img: ImageItem):
                 await asyncio.sleep(wait_time)
                 wait_time = min(wait_time + 2, 9)
 
-    async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams, image_max_patch_num):
+    async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams, sampling_params: SamplingParams):
         # 只有 P 和 NORMAL 节点需要真的管理多模态资源
         if self.pd_mode.is_P_or_NORMAL():
             for img in multimodal_params.images:
-                self.tokenizer.init_imageItem_extral_params(img, multimodal_params, image_max_patch_num)
+                self.tokenizer.init_imageItem_extral_params(img, multimodal_params, sampling_params)
                 record = await self._alloc_resource(img)
                 img.uuid = record["id"]
                 img.token_id = record["token_id"]
@@ -151,15 +152,15 @@ async def _release_multimodal_resources(self, multimodal_params: MultimodalParam
                         img.token_num = None
         return
 
-    def tokens(self, prompt, multimodal_params, kwargs=None):
+    def tokens(self, prompt, multimodal_params, samping_params=SamplingParams, kwargs=None):
         kwargs = {} if kwargs is None else kwargs
         prompt_ids = self.tokenizer.encode(prompt, None, **kwargs)
         image_tokens = 0
         img_count = 0
-        max_num = multimodal_params.max_num
         for img in multimodal_params.images:
             img_count += 1
-            image_tokens += self.tokenizer.get_image_token_length(img, max_num)
+            self.tokenizer.init_imageItem_extral_params(img, multimodal_params, samping_params)
+            image_tokens += self.tokenizer.get_image_token_length(img)
         return len(prompt_ids) + image_tokens + img_count
 
     async def loop_for_request(self):
@@ -307,9 +308,7 @@ async def _encode(
         if isinstance(prompt, str):
             if self.enable_multimodal:
                 assert len(multimodal_params.images) <= self.args.cache_capacity, "too many images!"
-                await self._alloc_multimodal_resources(
-                    multimodal_params, image_max_patch_num=sampling_params.image_max_patch_num
-                )
+                await self._alloc_multimodal_resources(multimodal_params, sampling_params)
                 prompt_ids = self.tokenizer.encode(
                     prompt, multimodal_params, add_special_tokens=sampling_params.add_special_tokens
                 )