Skip to content

Commit 93625f0

Browse files
committed
fix
1 parent 16e5d6c commit 93625f0

File tree

2 files changed

+13
-34
lines changed

2 files changed

+13
-34
lines changed

lightllm/models/mineru2_qwen/model.py

Lines changed: 13 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
from lightllm.models.registry import ModelRegistry
99
from lightllm.models.qwen2.model import Qwen2TpPartModel
1010
from lightllm.models.qwen_vl.layer_infer.pre_layer_infer import LlamaMultimodalPreLayerInfer
11-
from lightllm.models.internvl.layer_weights.pre_and_post_layer_weight import InternVLLlamaPreAndPostLayerWeight
12-
from lightllm.models.internvl.img_process import get_image_patch
1311

1412
from ..mineru2_qwen.image_processing_mineru2 import Mineru2ImageProcessor
1513
from .image_processing_mineru2 import get_anyres_image_grid_shape
@@ -65,34 +63,20 @@ def init_audioitem_extral_params(
6563
def get_image_token_length(self, img: ImageItem):
6664
# 切回 patch 序列:总token数 = 视图数 × 每视图patch数
6765
# 每视图patch数 = self.image_length = (image_size // patch_size) ** 2
68-
aspect = getattr(self.image_processor, "image_aspect_ratio", None)
6966
patch_len = int(self.image_length)
70-
try:
71-
if aspect and (aspect == "anyres" or (isinstance(aspect, str) and "anyres_max" in aspect)):
72-
crop_size = self.image_processor.crop_size["height"]
73-
grid_w, grid_h = get_anyres_image_grid_shape(
74-
(img.image_w, img.image_h), self.image_processor.image_grid_pinpoints, crop_size
75-
)
76-
views = int(grid_w * grid_h + 1)
77-
token_num = views * patch_len
78-
print(
79-
f"[debug] mineru2_tokenizer anyres img_size=({img.image_w},{img.image_h}) "
80-
f"crop={crop_size} grid=({grid_w},{grid_h}) views={views}"
81-
f" patch_len={patch_len} token_num={token_num}"
82-
)
83-
return token_num
84-
else:
85-
token_num = patch_len
86-
print(
87-
f"[debug] mineru2_tokenizer non-anyres views=1 patch_len={patch_len}"
88-
f" token_num={token_num} aspect={aspect}"
89-
)
90-
return token_num
91-
except Exception as e:
92-
# 兜底:按单视图返回
93-
token_num = patch_len
94-
print(f"[debug] mineru2_tokenizer token_num_fallback due to {e}, return {token_num}")
95-
return token_num
67+
68+
crop_size = self.image_processor.crop_size["height"]
69+
grid_w, grid_h = get_anyres_image_grid_shape(
70+
(img.image_w, img.image_h), self.image_processor.image_grid_pinpoints, crop_size
71+
)
72+
views = int(grid_w * grid_h + 1)
73+
token_num = views * patch_len
74+
print(
75+
f"[debug] mineru2_tokenizer anyres img_size=({img.image_w},{img.image_h}) "
76+
f"crop={crop_size} grid=({grid_w},{grid_h}) views={views}"
77+
f" patch_len={patch_len} token_num={token_num}"
78+
)
79+
return token_num
9680

9781
def get_audio_token_length(self, audio: AudioItem):
9882
raise NotImplementedError
@@ -132,15 +116,11 @@ def encode(self, prompt, multimodal_params: MultimodalParams = None, add_special
132116
if image_id < len(multimodal_params.images):
133117
print(f"[warning] mineru2_tokenizer unused images: {len(multimodal_params.images) - image_id}")
134118

135-
print(f"[debug] mineru2_tokenizer input_ids={input_ids}")
136119
return input_ids
137120

138121

139122
@ModelRegistry("mineru2_qwen", is_multimodal=True)
140123
class Mineru2QwenForCausalLM(Qwen2TpPartModel):
141-
# weight class
142-
# pre_and_post_weight_class = InternVLLlamaPreAndPostLayerWeight
143-
144124
# infer class
145125
pre_layer_infer_class = LlamaMultimodalPreLayerInfer
146126

lightllm/server/httpserver/manager.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,6 @@ async def generate(
286286
)
287287
# 给img id
288288
prompt_ids = await self._encode(prompt, multimodal_params, sampling_params)
289-
print(f"[debug] generate prompt_ids: {prompt_ids}")
290289
prompt_tokens = len(prompt_ids)
291290
# 监控
292291
if group_request_id > 0:

0 commit comments

Comments
 (0)