Skip to content

Commit f9737f9

Browse files
jtuylspstarkcdpr
authored andcommitted
[Codegen] Remove batch size in target intrinsic checks (iree-org#22289)
Conceptually, we shouldn't depend on batch size information when checking whether we should target a mfma intrinsics as that doesn't change whether a matmul-like operation is compute or memory bound. Signed-off-by: Jorn Tuyls <[email protected]>
1 parent 84c77f8 commit f9737f9

File tree

3 files changed

+6
-11
lines changed

3 files changed

+6
-11
lines changed

compiler/src/iree/compiler/Codegen/Common/GPU/GPUHeuristics.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -260,11 +260,6 @@ static LogicalResult canTargetIntrinsic(const GPUMatmulShapeType &problem,
260260
// remove this todo.
261261
const int64_t mSize = llvm::product_of(problem.mSizes);
262262
const int64_t nSize = llvm::product_of(problem.nSizes);
263-
// TODO(jornt): Remove this check as batch size doesn't make a computation
264-
// more compute bound, so it shouldn't be considered.
265-
if (!problem.batchSizes.empty()) {
266-
return success();
267-
}
268263
if ((mSize <= kVerySkinnyDimThreshold && (nSize > preferredSubgroupSize)) ||
269264
(nSize <= kVerySkinnyDimThreshold && (mSize > preferredSubgroupSize))) {
270265
return failure();

compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -293,12 +293,12 @@ module {
293293
// -----
294294

295295
module {
296-
func.func @unaligned_to_intrinsic_batched_matmul(%lhs : tensor<12x2x577xf32>, %rhs : tensor<12x577x577xf32>) -> tensor<12x2x577xf32> {
296+
func.func @unaligned_to_intrinsic_batched_matmul(%lhs : tensor<12x8x577xf32>, %rhs : tensor<12x577x577xf32>) -> tensor<12x8x577xf32> {
297297
%c0 = arith.constant 0.0 : f32
298-
%empty = tensor.empty() : tensor<12x2x577xf32>
299-
%fill = linalg.fill ins(%c0 : f32) outs(%empty : tensor<12x2x577xf32>) -> tensor<12x2x577xf32>
300-
%mm = linalg.batch_matmul ins(%lhs, %rhs : tensor<12x2x577xf32>, tensor<12x577x577xf32>) outs(%fill : tensor<12x2x577xf32>) -> tensor<12x2x577xf32>
301-
return %mm : tensor<12x2x577xf32>
298+
%empty = tensor.empty() : tensor<12x8x577xf32>
299+
%fill = linalg.fill ins(%c0 : f32) outs(%empty : tensor<12x8x577xf32>) -> tensor<12x8x577xf32>
300+
%mm = linalg.batch_matmul ins(%lhs, %rhs : tensor<12x8x577xf32>, tensor<12x577x577xf32>) outs(%fill : tensor<12x8x577xf32>) -> tensor<12x8x577xf32>
301+
return %mm : tensor<12x8x577xf32>
302302
}
303303
}
304304

compiler/src/iree/compiler/Codegen/LLVMGPU/test/config_matvec.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ func.func @static_batch_matvec() {
2525

2626

2727
// CHECK: LLVMGPUVectorDistribute
28-
// CDNA3: LLVMGPUTileAndFuse
28+
// CDNA3: LLVMGPUVectorDistribute
2929

3030
// -----
3131

0 commit comments

Comments
 (0)