[Performance][MM] Building the inverse permutation in O(n) time in Qwen2_5_VisionTransformer (vllm-project#24443)

david6666666 · Junhong · skyloevil · commit 72a1c891b593 · 2025-09-13T12:40:21.000+08:00
Signed-off-by: Junhong &lt;liujunhong11@huawei.com&gt;
Co-authored-by: Junhong &lt;liujunhong11@huawei.com&gt;
diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py
@@ -717,6 +717,15 @@ def compute_attn_mask_seqlen(
             seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
         return max_seqlen, seqlens
 
+    @staticmethod
+    def invert_permutation(perm: torch.Tensor) -> torch.Tensor:
+        # building the inverse permutation in O(n) time
+        inv = torch.empty_like(perm)
+        inv[perm] = torch.arange(perm.numel(),
+                                 device=perm.device,
+                                 dtype=perm.dtype)
+        return inv
+
     def forward(
         self,
         x: torch.Tensor,
@@ -760,6 +769,8 @@ def forward(
 
         rotary_pos_emb = torch.cat(rotary_pos_emb)
         window_index = torch.cat(window_index)
+        # compute reverse indices
+        reverse_indices = self.invert_permutation(window_index)
         cu_window_seqlens = torch.cat(cu_window_seqlens)
         cu_window_seqlens = torch.unique_consecutive(cu_window_seqlens)
         cu_seqlens = torch.cat(cu_seqlens)
@@ -813,7 +824,6 @@ def forward(
 
         # adapter
         hidden_states = self.merger(hidden_states)
-        reverse_indices = torch.argsort(window_index)
         hidden_states = hidden_states[reverse_indices, :]
         return hidden_states