Skip to content

Commit 1ae95d8

Browse files
committed
fix
1 parent 3dcb861 commit 1ae95d8

File tree

2 files changed

+23
-3
lines changed

2 files changed

+23
-3
lines changed

lightllm/models/mineru2_qwen/mineru2_visual.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@ def cuda(self):
8585
return self
8686

8787
def forward(self, x):
88-
return self.projector(self.vision_tower(x))
88+
vision_out = self.vision_tower(x)
89+
pooled = vision_out.pooler_output
90+
return self.projector(pooled)
8991

9092
def encode(self, images: List[ImageItem]):
9193
img_tensors = []
@@ -112,6 +114,7 @@ def encode(self, images: List[ImageItem]):
112114
return None
113115

114116
img = torch.cat(img_tensors, dim=0)
117+
img = img.cuda()
115118
all_img_embeds = self.forward(img)
116119

117120
return all_img_embeds, uuids, valid_ids

lightllm/models/mineru2_qwen/model.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,27 @@ def get_audio_token_length(self, audio: AudioItem):
5454

5555
# only change the impl of the encode func:
5656
def encode(self, prompt, multimodal_params: MultimodalParams = None, add_special_tokens: bool = True):
57+
image_token_id = getattr(self, "image_token_index", 151646)
58+
image_token = self.image_token
5759

58-
origin_ids = self.tokenizer.encode(prompt)
60+
text_parts = prompt.split(image_token)
61+
token_ids = []
62+
image_offsets = []
63+
offset = 0
64+
for i, part in enumerate(text_parts):
65+
part_ids = self.tokenizer.encode(part, add_special_tokens=(add_special_tokens if i == 0 else False))
66+
token_ids.extend(part_ids)
67+
offset += len(part_ids)
68+
if i < len(text_parts) - 1:
69+
token_ids.append(image_token_id)
70+
image_offsets.append(offset)
71+
offset += 1
5972

60-
return origin_ids
73+
# 记录image_offsets方便后处理
74+
if multimodal_params is not None:
75+
multimodal_params.image_offsets = image_offsets
76+
# multimodal_params.image_pad_len 可在后处理时补充
77+
return token_ids
6178

6279

6380
@ModelRegistry("mineru2_qwen", is_multimodal=True)

0 commit comments

Comments
 (0)