[XPU]Fix for Qwen-OMNI crash (vllm-project#35249)

xuechendi · web-flow · commit d72b0be33cdd · 2026-02-25T07:31:07.000-08:00
Signed-off-by: Chendi Xue &lt;chendi.xue@intel.com&gt;
diff --git a/vllm/_xpu_ops.py b/vllm/_xpu_ops.py
@@ -105,9 +105,10 @@ def flash_attn_varlen_func(
             assert len(window_size) == 2
             real_window_size = (window_size[0], window_size[1])  # noqa: F841
 
-        # In encode attention, v maybe not contiguous and current
+        # In encode attention, k and v maybe not contiguous and current
         # kernel can't handle it
         if block_table is None:
+            k = k.contiguous()
             v = v.contiguous()
         return flash_attn_varlen_func(
             out=out,