Skip to content

Commit 17de477

Browse files
committed
fix
1 parent df8ec7f commit 17de477

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

lightllm/models/mineru2_qwen/model.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,21 @@ def init_audioitem_extral_params(
6464

6565
def get_image_token_length(self, img: ImageItem):
6666
# 对于 Mineru2 集成,视觉塔返回的是每个裁剪的一条 pooled 向量。
67-
# token 数应与裁剪数量一致:anyres 模式为 1(原图)+ 网格裁剪数,否则为 1
67+
# token 数应与裁剪数量一致:anyres 模式为 1(原图)+ 网格裁剪数,且每块含双视图(factor=2)
6868
aspect = getattr(self.image_processor, "image_aspect_ratio", None)
6969
try:
7070
if aspect and (aspect == "anyres" or (isinstance(aspect, str) and "anyres_max" in aspect)):
7171
crop_size = self.image_processor.crop_size["height"]
7272
grid_w, grid_h = get_anyres_image_grid_shape(
7373
(img.image_w, img.image_h), self.image_processor.image_grid_pinpoints, crop_size
7474
)
75-
token_num = int(grid_w * grid_h + 1)
75+
base = int(grid_w * grid_h + 1)
76+
view_factor = 2 # 与 encode 中观测到的 t.shape[1]==2 对齐
77+
token_num = base * view_factor
7678
print(
7779
f"[debug] mineru2_tokenizer anyres img_size=({img.image_w},{img.image_h}) "
78-
f"crop={crop_size} grid=({grid_w},{grid_h}) token_num={token_num}"
80+
f"crop={crop_size} grid=({grid_w},{grid_h}) base={base} view_factor={view_factor}"
81+
f" token_num={token_num}"
7982
)
8083
return token_num
8184
else:

0 commit comments

Comments
 (0)