Skip to content

Commit 7ed6488

Browse files
Groverksshhkit
authored andcommitted
Revert "[VectorDistribute] Correctly find new dimensions during reduction config" (iree-org#21810)
Reverts iree-org#21797 This patch does some weird things with elementwise consumers, tiling them serially, when they should be tiled by consumer fusion. This needs more work before it can be landed. Signed-off-by: Ivan Ho <[email protected]>
1 parent 195edef commit 7ed6488

File tree

3 files changed

+36
-15
lines changed

3 files changed

+36
-15
lines changed

compiler/src/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,6 @@ getVectorDistributeReductionConfig(
492492
return loweringConfig;
493493
}
494494

495-
// TODO: Use IndexingMapInterface here instead of linalg::LinalgOp.
496495
static LogicalResult
497496
populateConfigInfo(const llvm::SetVector<linalg::LinalgOp> &computeOps,
498497
IREE::GPU::TargetAttr target, int64_t workgroupSize,
@@ -524,28 +523,42 @@ populateConfigInfo(const llvm::SetVector<linalg::LinalgOp> &computeOps,
524523
// LinalgOp with only parallel dims. This is needed if the op cannot be fused
525524
// with a reduction or introduces new loop dimensions.
526525
auto shouldAttachLoweringConfig = [&](linalg::LinalgOp linalgOp) -> bool {
527-
// We want to attach a lowering config to this operation if it introduces
528-
// a new dimension, when going by topological order in the backward slice.
529-
// The only two ways to introduce a new dimension are:
530-
//
531-
// 1. We have a reduction dimension.
532-
if (hasReductionIterator(linalgOp)) {
533-
return true;
534-
}
535-
// 2. There is no consumer which is a compute op (i.e., it already
536-
// has some way of getting fused).
537-
if (llvm::none_of(linalgOp->getUsers(), [&](Operation *user) {
526+
// If the operation has a gather, we want to fuse it with the
527+
// reduction.
528+
if (hasExternalCapture(cast<linalg::GenericOp>(linalgOp))) {
529+
return false;
530+
}
531+
// If some of the users are in computeOps and some are outside of
532+
// computeOps; attach lowering config, since the op can't be fused.
533+
if (llvm::any_of(linalgOp->getUsers(),
534+
[&](Operation *user) {
535+
auto linalgUser = dyn_cast<linalg::LinalgOp>(user);
536+
return linalgUser && computeOps.contains(linalgUser);
537+
}) &&
538+
llvm::any_of(linalgOp->getUsers(), [&](Operation *user) {
538539
auto linalgUser = dyn_cast<linalg::LinalgOp>(user);
539-
return linalgUser && computeOps.contains(linalgUser);
540+
return !linalgUser;
540541
})) {
541542
return true;
542543
}
543544

545+
// If the indexing map introduces new dimensions (more inputs than results),
546+
// attach a lowering config.
547+
for (OpOperand *operand : linalgOp.getDpsInputOperands()) {
548+
int64_t operandIdx = linalgOp.getIndexingMapIndex(operand);
549+
AffineMap indexingMap = linalgOp.getIndexingMapsArray()[operandIdx];
550+
if (indexingMap.getNumResults() > 0 &&
551+
indexingMap.getNumInputs() > indexingMap.getNumResults()) {
552+
return true;
553+
}
554+
}
555+
544556
return false;
545557
};
546558

547559
for (linalg::LinalgOp linalgOp : computeOps) {
548-
if (shouldAttachLoweringConfig(linalgOp)) {
560+
if (hasReductionIterator(linalgOp) ||
561+
shouldAttachLoweringConfig(linalgOp)) {
549562
auto loweringConfig = getVectorDistributeReductionConfig(
550563
linalgOp, target, sharedWgpTiles, workgroupSize, subgroupSize,
551564
threadLoads);

compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -755,6 +755,9 @@ void addGPUVectorDistributePassPipeline(OpPassManager &funcPassManager,
755755
/*convertToDpsOptions=*/std::nullopt,
756756
/*reorderStrategy=*/reorderStrategy);
757757

758+
// Some of the elementwise fusion can benefit from this pass.
759+
funcPassManager.addPass(createRematerializeParallelOpsPass());
760+
758761
funcPassManager.addPass(
759762
IREE::LinalgExt::createConvertAttentionToOnlineAttentionPass());
760763

compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_vector_distribute_reduction_gfx942.mlir

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,12 @@ func.func @test_multiple_stores(%arg0: !iree_tensor_ext.dispatch.tensor<readonly
245245
// CHECK: func.func @test_multiple_stores
246246
// CHECK-SAME: translation_info = #[[$TRANSLATION]]
247247
// CHECK: linalg.generic
248-
// CHECK-NOT: lowering_config
248+
// CHECK-SAME: attrs = {lowering_config = #iree_gpu.lowering_config<{
249+
// CHECK-SAME: lane_basis = {{\[}}[1, 64], [0, 1]],
250+
// CHECK-SAME: reduction = [0, 4096],
251+
// CHECK-SAME: subgroup_basis = {{\[}}[1, 16], [0, 1]],
252+
// CHECK-SAME: thread = [0, 4],
253+
// CHECK-SAME: workgroup = [1, 0]
249254
// CHECK: linalg.generic
250255
// CHECK-SAME: attrs = {lowering_config = #iree_gpu.lowering_config<{
251256
// CHECK-SAME: lane_basis = {{\[}}[1, 64], [0, 1]],

0 commit comments

Comments
 (0)