Skip to content

Commit 7ffbf27

Browse files
authored
[BugFix][FlashInfer] Fix potential race condition for paged_kv_indptr_cpu (#23737)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent 27e88ce commit 7ffbf27

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

vllm/v1/attention/backends/flashinfer.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
237237
device="cpu",
238238
pin_memory=pin_memory)
239239
self.paged_kv_indptr_np = self.paged_kv_indptr_cpu.numpy()
240+
self.paged_kv_indptr_buffer = torch.zeros_like(
241+
self.paged_kv_indptr_cpu, pin_memory=pin_memory)
240242
self.paged_kv_indices_cpu = torch.zeros(max_num_pages,
241243
dtype=torch.int32,
242244
device="cpu",
@@ -361,12 +363,18 @@ def build(self,
361363
dtype=np.int32,
362364
out=self.paged_kv_indptr_np[1:num_reqs + 1],
363365
)
366+
# NOTE(woosuk): Because self.paged_kv_indptr_cpu can be modified
367+
# after this line (e.g., for cuda graphs), we need to copy the data to
368+
# self.paged_kv_indptr_buffer to avoid race condition.
369+
self.paged_kv_indptr_buffer[:num_reqs +
370+
1] = (self.paged_kv_indptr_cpu[:num_reqs +
371+
1])
364372
paged_kv_indptr = self.paged_kv_indptr[:num_reqs + 1]
365-
paged_kv_indptr.copy_(self.paged_kv_indptr_cpu[:num_reqs + 1],
373+
paged_kv_indptr.copy_(self.paged_kv_indptr_buffer[:num_reqs + 1],
366374
non_blocking=True)
367375

368376
# write self.paged_kv_indices inplace
369-
num_actual_pages = num_blocks_np.sum().item()
377+
num_actual_pages = self.paged_kv_indptr_np[num_reqs]
370378
paged_kv_indices = self.paged_kv_indices[:num_actual_pages]
371379
_copy_page_indices_kernel[(num_reqs, )](
372380
paged_kv_indices,

0 commit comments

Comments
 (0)