Skip to content

Commit b297fc2

Browse files
authored
bugfix: fix fp4 quantization with 8x4 scale factor layout (#1611)
1 parent eab0de4 commit b297fc2

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

csrc/nv_internal/tensorrt_llm/kernels/quantization.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -766,8 +766,9 @@ quantize_with_block_size(
766766
bool isSfSwizzledLayout = layout == QuantizationSFLayout::SWIZZLED_128x4 ||
767767
layout == QuantizationSFLayout::SWIZZLED_8x4;
768768

769-
// The number of padded rows considering 128x4 SF layout.
770-
int numPaddedRowsForSf = isSfSwizzledLayout ? PadUpFn(numRows, 128) : numRows;
769+
// The number of padded rows considering 128x4 or 8x4 SF layout.
770+
int rowTile = (layout == QuantizationSFLayout::SWIZZLED_128x4) ? 128 : 8;
771+
int numPaddedRowsForSf = isSfSwizzledLayout ? PadUpFn(numRows, rowTile) : numRows;
771772
int numColsForSf = isSfSwizzledLayout ? PadUpFn(numPaddedCols, 4 * SF_VEC_SIZE) : numPaddedCols;
772773

773774
// The number of threads in the column dimension。

0 commit comments

Comments
 (0)