Skip to content

Commit 89cccd8

Browse files
committed
A tile multi-tiling.
1 parent 4decb3c commit 89cccd8

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

csrc/kernels.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3061,10 +3061,10 @@ template <typename T, int BITS, int THREADS> __global__ void gemm_device(int M,
30613061
T local_A[1];
30623062
T local_B[32];
30633063

3064-
const int a_tile_offset = (8*16 + 16);
3064+
const int a_tile_offset = (16 + 16);
30653065
const int b_tile_offset = (16*32 + 16);
30663066

3067-
__shared__ T smem_A[2*batch_size_warps*8*16 + (2*16*(batch_size_warps-1))];
3067+
__shared__ T smem_A[8*16 + (4*16*(batch_size_warps-1))];
30683068
__shared__ T smem_B[2*batch_size_warps*16*32 + (2*16*(batch_size_warps-1))];
30693069
__shared__ T smem_C[8*32];
30703070

0 commit comments

Comments
 (0)