We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 42489e4 commit d72b0beCopy full SHA for d72b0be
vllm/_xpu_ops.py
@@ -105,9 +105,10 @@ def flash_attn_varlen_func(
105
assert len(window_size) == 2
106
real_window_size = (window_size[0], window_size[1]) # noqa: F841
107
108
- # In encode attention, v maybe not contiguous and current
+ # In encode attention, k and v maybe not contiguous and current
109
# kernel can't handle it
110
if block_table is None:
111
+ k = k.contiguous()
112
v = v.contiguous()
113
return flash_attn_varlen_func(
114
out=out,
0 commit comments