We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 75404d0 commit 5b19b93Copy full SHA for 5b19b93
csrc/moe/moe_align_sum_kernels.cu
@@ -207,8 +207,8 @@ __global__ void sgl_moe_align_block_size_kernel(
207
__shared__ int32_t shared_counts[32][8];
208
__shared__ int32_t local_offsets[256];
209
210
- const int warp_id = threadIdx.x / WARP_SIZE;
211
- const int lane_id = threadIdx.x % WARP_SIZE;
+ const int warp_id = threadIdx.x / 32;
+ const int lane_id = threadIdx.x % 32;
212
const int experts_per_warp = 8;
213
const int my_expert_start = warp_id * experts_per_warp;
214
0 commit comments