File tree Expand file tree Collapse file tree 1 file changed +5
-2
lines changed
specforge/modeling/target Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Original file line number Diff line number Diff line change 88import torch .nn as nn
99from sglang .srt .configs .model_config import ModelConfig
1010from sglang .srt .layers .rotary_embedding import MRotaryEmbedding
11- from sglang .srt .managers .mm_utils import init_mm_embedding_cache
11+ from sglang .srt .managers .mm_utils import init_mm_embedding_cache , MultiModalityDataPaddingPatternMultimodalTokens
1212from sglang .srt .managers .schedule_batch import (
1313 Modality ,
1414 MultimodalDataItem ,
@@ -623,6 +623,7 @@ def extend_vlm(
623623 offsets = offset , # List of (start, end) tuples
624624 )
625625 mm_item .set ("image_grid_thw" , image_grid_thw_ .cpu ())
626+ mm_item .set_pad_value ()
626627 mm_inputs = MultimodalInputs (
627628 mm_items = [mm_item ],
628629 im_token_id = self .image_token_id ,
@@ -633,10 +634,12 @@ def extend_vlm(
633634 ),
634635 mrope_position_delta = mrope_position_delta ,
635636 )
637+ pattern = MultiModalityDataPaddingPatternMultimodalTokens ()
638+ input_id_list = pattern .pad_input_tokens (input_id_ .view (- 1 ).tolist (), mm_inputs )
636639 req = Req (
637640 rid = str (idx ),
638641 origin_input_text = "" ,
639- origin_input_ids = input_id_ . view ( - 1 ). tolist () ,
642+ origin_input_ids = input_id_list ,
640643 sampling_params = sampling_params ,
641644 )
642645 req .fill_ids = req .origin_input_ids
You can’t perform that action at this time.
0 commit comments