Skip to content

Commit b38ed29

Browse files
committed
Bug 5412815: Fix the issue of cudaTensorCoreGemm.cu
1 parent 4a631c9 commit b38ed29

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ __global__ void compute_gemm(const half *A, const half *B, const float *C, float
224224
// there's no such tile, all warps in this CTA exit.
225225
for (unsigned int block_pos = blockIdx.x;; block_pos += gridDim.x) {
226226
const unsigned int block_tile_i = ((block_pos * BLOCK_ROW_TILES) / N_TILES) * (BLOCK_COL_TILES);
227-
const unsigned int block_tile_j = (block_pos * BLOCK_COL_TILES) % N_TILES;
227+
const unsigned int block_tile_j = (block_pos * BLOCK_ROW_TILES) % N_TILES;
228228

229229
// Stop when there are no more D matrix tiles to compute in this CTA.
230230
if (block_tile_i >= M_TILES) {

0 commit comments

Comments
 (0)