11# SPDX-License-Identifier: Apache-2.0
22# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
4- from collections import defaultdict
54from dataclasses import dataclass
65from itertools import accumulate
7- from typing import Dict , List , Optional , Tuple , Type
6+ from typing import List , Optional , Tuple , Type
87
98import torch
109
1110from vllm .attention .backends .abstract import (AttentionBackend , AttentionImpl ,
1211 AttentionMetadata ,
1312 AttentionMetadataBuilder )
1413from vllm .attention .backends .utils import CommonAttentionState
15- from vllm .multimodal import MultiModalPlaceholderMap
1614from vllm .utils import async_tensor_h2d
1715
1816# Placeholder attention backend for models like Mamba and pooling models that
@@ -141,8 +139,6 @@ def prefill_metadata(self) -> Optional["PlaceholderAttentionMetadata"]:
141139 num_prefill_tokens = self .num_prefill_tokens ,
142140 num_decode_tokens = 0 ,
143141 slot_mapping = slot_mapping ,
144- multi_modal_placeholder_index_maps = self .
145- multi_modal_placeholder_index_maps ,
146142 enable_kv_scales_calculation = self .enable_kv_scales_calculation ,
147143 seq_lens = seq_lens ,
148144 seq_lens_tensor = seq_lens_tensor ,
@@ -178,7 +174,6 @@ def decode_metadata(self) -> Optional["PlaceholderAttentionMetadata"]:
178174 num_prefill_tokens = 0 ,
179175 num_decode_tokens = self .num_decode_tokens ,
180176 slot_mapping = slot_mapping ,
181- multi_modal_placeholder_index_maps = None ,
182177 enable_kv_scales_calculation = True ,
183178 seq_lens = None ,
184179 seq_lens_tensor = seq_lens_tensor ,
@@ -210,9 +205,6 @@ def prepare(self):
210205 self .prefill_seq_lens : List [int ] = []
211206 self .context_lens : List [int ] = []
212207 self .curr_seq_lens : List [int ] = []
213- self .multimodal_placeholder_maps : Dict [
214- str ,
215- MultiModalPlaceholderMap ] = defaultdict (MultiModalPlaceholderMap )
216208 self .num_prefills = 0
217209 self .num_prefill_tokens = 0
218210 self .num_decode_tokens = 0
@@ -232,12 +224,6 @@ def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool):
232224 self .context_lens .append (context_len )
233225
234226 if is_prompt :
235- mm_maps = inter_data .multi_modal_placeholder_maps
236- if mm_maps :
237- for modality , placeholders in mm_maps .items ():
238- self .multimodal_placeholder_maps [modality ].extend (
239- placeholders )
240-
241227 self .num_prefills += 1
242228 self .num_prefill_tokens += token_len
243229 self .prefill_seq_lens .append (seq_len )
@@ -295,20 +281,13 @@ def build(self, seq_lens: List[int], query_lens: List[int],
295281 seq_start_loc_tensor = async_tensor_h2d (seq_start_loc , torch .int32 ,
296282 device , self .runner .pin_memory )
297283
298- placeholder_index_maps = {
299- modality : placeholder_map .index_map ()
300- for modality , placeholder_map in
301- self .multimodal_placeholder_maps .items ()
302- }
303-
304284 # Placeholders
305285 slot_mapping_tensor = torch .empty (0 )
306286 block_tables = torch .empty (0 )
307287
308288 return PlaceholderAttentionMetadata (
309289 num_prefills = self .num_prefills ,
310290 slot_mapping = slot_mapping_tensor ,
311- multi_modal_placeholder_index_maps = placeholder_index_maps ,
312291 enable_kv_scales_calculation = True ,
313292 num_prefill_tokens = self .num_prefill_tokens ,
314293 num_decode_tokens = num_decode_tokens ,
0 commit comments