We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent da41328 commit b9f430fCopy full SHA for b9f430f
kernels/mat-transpose/mat_transpose.cu
@@ -310,7 +310,7 @@ __global__ void mat_transpose_f32x4_shared_bcf_row2col2d_kernel(
310
const int N = x.size(1); \
311
dim3 block(WARP_SIZE_S, WARP_SIZE_S); \
312
dim3 grid((N + WARP_SIZE_S - 1) / (WARP_SIZE_S * n_element_col), \
313
- (M + WARP_SIZE_S - 1) / (WARP_SIZE_S / n_element_row)); \
+ (M + WARP_SIZE_S - 1) / (WARP_SIZE_S * n_element_row)); \
314
mat_transpose_##tag##2d_kernel <<<grid, block>>>( \
315
reinterpret_cast<element_type *>(x.data_ptr()), \
316
reinterpret_cast<element_type *>(y.data_ptr()), M, N); \
0 commit comments