Skip to content

Commit fece864

Browse files
committed
hack: unroll(4)
1 parent 8c719e6 commit fece864

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

csrc/fused_moe/cutlass_backend/cutlass_fused_moe_kernels.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,7 @@ __device__ inline int64_t findTotalEltsLessThanTarget_v2(T const* sorted_indices
894894
const int lane_id = threadIdx.x & (WARP_SZ - 1);
895895

896896
int local_count = 0;
897-
#pragma unroll
897+
#pragma unroll(4)
898898
for (int k = 0; k < arr_length / WARP_SZ; ++k) {
899899
const int idx = lane_id + k * WARP_SZ;
900900
T v = sorted_indices[idx];

0 commit comments

Comments
 (0)