Skip to content

Commit f4cdbed

Browse files
author
none
committed
fix
1 parent 5cbec55 commit f4cdbed

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

lightllm/common/basemodel/triton_kernel/kv_cache_offload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def _offload_gpu_kv_to_cpu(
4545
+ token_indexes[:, None] * gpu_stride1
4646
+ head_all_dim_range[None, :]
4747
)
48-
gpu_data = tl.load(gpu_ptr, mask=(head_all_dim_range[:, None] < head_all_dim), other=0.0)
48+
gpu_data = tl.load(gpu_ptr, mask=(head_all_dim_range[None, :] < head_all_dim), other=0.0)
4949
cpu_ptr = (
5050
cpu_kv_cache_ptr
5151
+ cpu_page_index * cpu_stride0
@@ -56,7 +56,7 @@ def _offload_gpu_kv_to_cpu(
5656
tl.store(
5757
cpu_ptr,
5858
gpu_data,
59-
mask=(head_all_dim_range[:, None] < head_all_dim),
59+
mask=(head_all_dim_range[None, :] < head_all_dim),
6060
)
6161
return
6262

0 commit comments

Comments
 (0)