Skip to content

Commit 68a43d8

Browse files
[LoadOpToBlockIOConversion] Limit vBlocks of 2d block load (#5313)
Fixes #5308, #5243 Flex Attn UT CI: https://github.com/intel/intel-xpu-backend-for-triton/actions/runs/18510656080 (GOOD) Signed-off-by: Whitney Tsang <[email protected]>
1 parent 080f2aa commit 68a43d8

File tree

2 files changed

+9
-0
lines changed

2 files changed

+9
-0
lines changed

third_party/intel/lib/TritonGENToLLVM/TritonGENToLLVMPass.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ loadCacheControlToCacheControls(Builder &builder,
115115
static bool isSPVBuiltinAvailable(TritonGEN::Matrix2DBlockLoadOp op) {
116116
// FIXME: The following signatures are not valid in SPV interface.
117117

118+
// intel_sub_group_2d_block_read_8b_1r16x4c
119+
if (op.getElemSizeInBits() == 8 && op.getTileHeight() == 1 &&
120+
op.getTileWidth() == 16 && op.getVBlocks() == 1 && !op.getVnniTransform())
121+
return false;
122+
118123
// intel_sub_group_2d_block_read_8b_8r8x1c
119124
if (op.getElemSizeInBits() == 8 && op.getTileHeight() == 8 &&
120125
op.getTileWidth() == 8 && op.getVBlocks() == 1 && !op.getVnniTransform())

third_party/intel/lib/TritonIntelGPUToLLVM/LoadStoreOpToLLVM.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2500,6 +2500,10 @@ struct LoadOpToBlockIOConversion
25002500
static_cast<int>(MAX_WIDTH / totalBytesPerRowPerMatrix));
25012501
// vBlocks has HW limitation of 4.
25022502
vBlocks = std::min(vBlocks, 4);
2503+
// Limit vBlocks to 1 if block size is smaller than GRF size.
2504+
const unsigned GRF_SIZE = 64;
2505+
if (tileHeight * tileWidth * packedElemSizeInBits / 8 < GRF_SIZE)
2506+
vBlocks = 1;
25032507

25042508
// TODO: use the axis info to general the handling for both regular pointer
25052509
// and block pointer.

0 commit comments

Comments
 (0)