Skip to content

Commit e88456e

Browse files
committed
Revert comment chagnes from previous PR for consistency
1 parent c6ba287 commit e88456e

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

csrc/kernels.hip

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2613,9 +2613,9 @@ template <typename T, int THREADS, int BITS> __global__ void kgemm_4bit_inferenc
26132613
{
26142614

26152615
// per threadblock:
2616-
// load step-by-step in chunks of [BNB_WARP_SIZE,warps]: 1xBNB_WARP_SIZE * [BNB_WARP_SIZE,warps] -> [1,warps]
2616+
// load step-by-step in chunks of [warp_size,warps]: 1xwarp_size * [warp_size,warps] -> [1,warps]
26172617
// 4 warps -> 4 loads per iter
2618-
// 1 x BNB_WARP_SIZE * BNB_WARP_SIZE x 4 -> 1x4 outputs per thread block
2618+
// 1xwarp_size * warp_sizex4 -> 1x4 outputs per thread block
26192619
typedef hipcub::WarpReduce<float, BNB_WARP_SIZE> WarpReduce;
26202620
__shared__ typename WarpReduce::TempStorage temp_storage[THREADS/BNB_WARP_SIZE];
26212621

0 commit comments

Comments
 (0)