[Llama4] [multimodal] Fix misplaced dtype cast of cos_sin_cache in Llama4VisionRotaryEmbedding (#25889)

cjackal · web-flow · commit 43b752c325d5 · 2025-09-30T20:35:15.000Z
Signed-off-by: cjackal &lt;44624812+cjackal@users.noreply.github.com&gt;
diff --git a/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py b/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py
@@ -59,7 +59,9 @@ def forward_native(  # type: ignore[override]
         key: Optional[torch.Tensor] = None,
     ) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
         assert key is not None
-        self._match_cos_sin_cache_dtype(query)
+        # self.cos_sin_cache here is complex tensor so we cannot cast into
+        # query's dtype directly with self._match_cos_sin_cache_dtype
+        self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to(query.device)
         query_ = torch.view_as_complex(query.float().reshape(
             *query.shape[:-1], -1, 2))
         key_ = torch.view_as_complex(key.float().reshape(