Skip to content

Commit 369f47a

Browse files
[DeepSeek v3.2] Remove unnecessary syncwarps (#31047)
Signed-off-by: Matthew Bonanni <[email protected]>
1 parent dabff12 commit 369f47a

File tree

1 file changed

+1
-6
lines changed

1 file changed

+1
-6
lines changed

csrc/cache_kernels.cu

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,6 @@ __global__ void indexer_k_quant_and_cache_kernel(
451451
for (int i = 0; i < VEC_SIZE; i++) {
452452
amax = fmaxf(amax, fabsf(float(k_val_ptr[i])));
453453
}
454-
#ifndef USE_ROCM
455-
__syncwarp();
456-
#endif
457454

458455
// Reduced amax
459456
for (int mask = 16; mask > 0; mask /= 2) {
@@ -463,9 +460,7 @@ __global__ void indexer_k_quant_and_cache_kernel(
463460
amax = fmaxf(amax, __shfl_xor_sync(unsigned(-1), amax, mask));
464461
#endif
465462
}
466-
#ifndef USE_ROCM
467-
__syncwarp();
468-
#endif
463+
469464
#if defined(__gfx942__)
470465
float scale = fmaxf(amax, 1e-4) / 224.0f;
471466
#else

0 commit comments

Comments
 (0)