Skip to content

Commit 1d17bcd

Browse files
amd-eochoaloMaheshRavishankar
authored andcommitted
Revert "[MLIR] Improve in-place folding to iterate until fixed-point (llvm#160615)"
This reverts commit fcf79e5.
1 parent 893b1d4 commit 1d17bcd

File tree

9 files changed

+42
-77
lines changed

9 files changed

+42
-77
lines changed

mlir/include/mlir/Transforms/FoldUtils.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,7 @@ class OperationFolder {
4040
/// deduplicated constants. If successful, replaces `op`'s uses with
4141
/// folded results, and returns success. If the op was completely folded it is
4242
/// erased. If it is just updated in place, `inPlaceUpdate` is set to true.
43-
/// On success() and when in-place, the folder is invoked until
44-
/// `maxIterations` is reached (default INT_MAX).
45-
LogicalResult tryToFold(Operation *op, bool *inPlaceUpdate = nullptr,
46-
int maxIterations = INT_MAX);
43+
LogicalResult tryToFold(Operation *op, bool *inPlaceUpdate = nullptr);
4744

4845
/// Tries to fold a pre-existing constant operation. `constValue` represents
4946
/// the value of the constant, and can be optionally passed if the value is
@@ -85,10 +82,7 @@ class OperationFolder {
8582

8683
/// Tries to perform folding on the given `op`. If successful, populates
8784
/// `results` with the results of the folding.
88-
/// On success() and when in-place, the folder is invoked until
89-
/// `maxIterations` is reached (default INT_MAX).
90-
LogicalResult tryToFold(Operation *op, SmallVectorImpl<Value> &results,
91-
int maxIterations = INT_MAX);
85+
LogicalResult tryToFold(Operation *op, SmallVectorImpl<Value> &results);
9286

9387
/// Try to process a set of fold results. Populates `results` on success,
9488
/// otherwise leaves it unchanged.

mlir/lib/IR/Builders.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include "mlir/IR/IRMapping.h"
1515
#include "mlir/IR/Matchers.h"
1616
#include "llvm/ADT/SmallVectorExtras.h"
17-
#include "llvm/Support/DebugLog.h"
1817

1918
using namespace mlir;
2019

@@ -487,18 +486,9 @@ OpBuilder::tryFold(Operation *op, SmallVectorImpl<Value> &results,
487486

488487
// Try to fold the operation.
489488
SmallVector<OpFoldResult, 4> foldResults;
490-
LDBG() << "Trying to fold: "
491-
<< OpWithFlags(op, OpPrintingFlags().skipRegions());
492489
if (failed(op->fold(foldResults)))
493490
return cleanupFailure();
494491

495-
int count = 0;
496-
do {
497-
LDBG() << "Folded in place #" << count
498-
<< " times: " << OpWithFlags(op, OpPrintingFlags().skipRegions());
499-
count++;
500-
} while (foldResults.empty() && succeeded(op->fold(foldResults)));
501-
502492
// An in-place fold does not require generation of any constants.
503493
if (foldResults.empty())
504494
return success();

mlir/lib/Transforms/Utils/FoldUtils.cpp

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include "mlir/IR/Builders.h"
1717
#include "mlir/IR/Matchers.h"
1818
#include "mlir/IR/Operation.h"
19-
#include "llvm/Support/DebugLog.h"
2019

2120
using namespace mlir;
2221

@@ -68,8 +67,7 @@ static Operation *materializeConstant(Dialect *dialect, OpBuilder &builder,
6867
// OperationFolder
6968
//===----------------------------------------------------------------------===//
7069

71-
LogicalResult OperationFolder::tryToFold(Operation *op, bool *inPlaceUpdate,
72-
int maxIterations) {
70+
LogicalResult OperationFolder::tryToFold(Operation *op, bool *inPlaceUpdate) {
7371
if (inPlaceUpdate)
7472
*inPlaceUpdate = false;
7573

@@ -88,7 +86,7 @@ LogicalResult OperationFolder::tryToFold(Operation *op, bool *inPlaceUpdate,
8886

8987
// Try to fold the operation.
9088
SmallVector<Value, 8> results;
91-
if (failed(tryToFold(op, results, maxIterations)))
89+
if (failed(tryToFold(op, results)))
9290
return failure();
9391

9492
// Check to see if the operation was just updated in place.
@@ -226,19 +224,10 @@ bool OperationFolder::isFolderOwnedConstant(Operation *op) const {
226224
/// Tries to perform folding on the given `op`. If successful, populates
227225
/// `results` with the results of the folding.
228226
LogicalResult OperationFolder::tryToFold(Operation *op,
229-
SmallVectorImpl<Value> &results,
230-
int maxIterations) {
227+
SmallVectorImpl<Value> &results) {
231228
SmallVector<OpFoldResult, 8> foldResults;
232-
if (failed(op->fold(foldResults)))
233-
return failure();
234-
int count = 1;
235-
do {
236-
LDBG() << "Folded in place #" << count
237-
<< " times: " << OpWithFlags(op, OpPrintingFlags().skipRegions());
238-
} while (count++ < maxIterations && foldResults.empty() &&
239-
succeeded(op->fold(foldResults)));
240-
241-
if (failed(processFoldResults(op, results, foldResults)))
229+
if (failed(op->fold(foldResults)) ||
230+
failed(processFoldResults(op, results, foldResults)))
242231
return failure();
243232
return success();
244233
}

mlir/test/Dialect/Arith/constant-fold.mlir

Lines changed: 0 additions & 18 deletions
This file was deleted.

mlir/test/Dialect/XeGPU/xegpu-attr-interface.mlir

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ gpu.module @test {
77
//CHECK: [[IDY:%.+]] = affine.apply #map()[[[sgId]]]
88
//CHECK: [[c32:%.+]] = arith.constant 32 : index
99
//CHECK: [[LOCALY:%.+]] = index.mul [[IDY]], [[c32]]
10+
//CHECK: [[c0:%.+]] = arith.constant 0 : index
11+
//CHECK: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index
1012
//CHECK: [[c128:%.+]] = arith.constant 128 : index
11-
//CHECK: [[MODY:%.+]] = index.remu [[LOCALY]], [[c128]]
13+
//CHECK: [[MODY:%.+]] = index.remu [[Y]], [[c128]]
1214
//CHECK: [[BASE:%.+]] = vector.step : vector<32xindex>
1315
//CHECK: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<32xindex>
1416
//CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<32xindex>
@@ -21,8 +23,10 @@ gpu.module @test {
2123
//CHECK: [[IDY:%.+]] = affine.apply #map()[[[sgId]]]
2224
//CHECK: [[c32:%.+]] = arith.constant 32 : index
2325
//CHECK: [[LOCALY:%.+]] = index.mul [[IDY]], [[c32]]
26+
//CHECK: [[c0:%.+]] = arith.constant 0 : index
27+
//CHECK: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index
2428
//CHECK: [[c128:%.+]] = arith.constant 128 : index
25-
//CHECK: [[MODY:%.+]] = index.remu [[LOCALY]], [[c128]]
29+
//CHECK: [[MODY:%.+]] = index.remu [[Y]], [[c128]]
2630
//CHECK: [[BASE:%.+]] = vector.step : vector<32xindex>
2731
//CHECK: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<32xindex>
2832
//CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<32xindex>

mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-rr.mlir

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@ gpu.module @test_round_robin_assignment {
2727
//CHECK: [[LX:%.+]] = index.mul [[IdX]], [[C64]]
2828
//CHECK: [[C0:%.+]] = arith.constant 0 : index
2929
//CHECK: [[C0_1:%.+]] = arith.constant 0 : index
30+
//CHECK: [[ADDY:%.+]] = arith.addi [[LY]], [[C0]] : index
31+
//CHECK: [[ADDX:%.+]] = arith.addi [[LX]], [[C0_1]] : index
3032
//CHECK: [[C128:%.+]] = arith.constant 128 : index
31-
//CHECK: [[offY:%.+]] = index.remu [[LY]], [[C128]]
33+
//CHECK: [[offY:%.+]] = index.remu [[ADDY]], [[C128]]
3234
//CHECK: [[C64_2:%.+]] = arith.constant 64 : index
33-
//CHECK: [[offX:%.+]] = index.remu [[LX]], [[C64_2]]
35+
//CHECK: [[offX:%.+]] = index.remu [[ADDX]], [[C64_2]]
3436
//CHECK: xegpu.create_nd_tdesc [[ARG_0]][[[offY]], [[offX]]] : memref<256x128xf32> -> !xegpu.tensor_desc<16x64xf32>
3537
%tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<256x128xf32>
3638
-> !xegpu.tensor_desc<128x64xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [16, 64]>>

mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,12 @@ gpu.module @test_distribution {
330330
//CHECK: [[l_off_x:%.+]] = index.mul [[id_x]], [[c32_1]]
331331
//CHECK: [[c0:%.+]] = arith.constant 0 : index
332332
//CHECK: [[c0_1:%.+]] = arith.constant 0 : index
333+
//CHECK: [[l_off_y_0:%.+]] = arith.addi [[l_off_y]], [[c0]] : index
334+
//CHECK: [[l_off_x_0:%.+]] = arith.addi [[l_off_x]], [[c0_1]] : index
333335
//CHECK: [[c64:%.+]] = arith.constant 64 : index
334-
//CHECK: [[off_y:%.+]] = index.remu [[l_off_y]], [[c64]]
336+
//CHECK: [[off_y:%.+]] = index.remu [[l_off_y_0]], [[c64]]
335337
//CHECK: [[c128:%.+]] = arith.constant 128 : index
336-
//CHECK: [[off_x:%.+]] = index.remu [[l_off_x]], [[c128]]
338+
//CHECK: [[off_x:%.+]] = index.remu [[l_off_x_0]], [[c128]]
337339
//CHECK: xegpu.load_matrix [[mdesc]][[[off_y]], [[off_x]]] <{layout = #xegpu.layout<lane_layout = [2, 8], lane_data = [1, 1]>}>: !xegpu.mem_desc<64x128xf32>, index, index -> vector<32x32xf32>
338340
%0 = xegpu.create_mem_desc %arg0 : memref<32768xi8, 3> -> !xegpu.mem_desc<64x128xf32>
339341
%1 = xegpu.load_matrix %0[0, 0] <{layout = #xegpu.layout<sg_layout = [2, 4], sg_data = [32, 32], lane_layout = [2, 8], lane_data = [1, 1]>}>: !xegpu.mem_desc<64x128xf32> -> vector<64x128xf32>
@@ -352,11 +354,13 @@ gpu.module @test_distribution {
352354
//CHECK: [[id_y:%.+]] = affine.apply #map()[[[sgid]]]
353355
//CHECK: [[id_x:%.+]] = affine.apply #map1()[[[sgid]]]
354356
//CHECK: [[c32:%.+]] = arith.constant 32 : index
355-
//CHECK: [[l_off_y:%.+]] = index.mul [[id_y]], [[c32]]
357+
//CHECK: [[l_off_y_0:%.+]] = index.mul [[id_y]], [[c32]]
356358
//CHECK: [[c32_1:%.+]] = arith.constant 32 : index
357-
//CHECK: [[l_off_x:%.+]] = index.mul [[id_x]], [[c32_1]]
359+
//CHECK: [[l_off_x_0:%.+]] = index.mul [[id_x]], [[c32_1]]
358360
//CHECK: [[c0:%.+]] = arith.constant 0 : index
359361
//CHECK: [[c0_2:%.+]] = arith.constant 0 : index
362+
//CHECK: [[l_off_y:%.+]] = arith.addi [[l_off_y_0]], [[c0]] : index
363+
//CHECK: [[l_off_x:%.+]] = arith.addi [[l_off_x_0]], [[c0_2]] : index
360364
//CHECK: [[c64:%.+]] = arith.constant 64 : index
361365
//CHECK: [[off_y:%.+]] = index.remu [[l_off_y]], [[c64]]
362366
//CHECK: [[c128:%.+]] = arith.constant 128 : index
@@ -413,10 +417,11 @@ gpu.module @test_distribution {
413417
//CHECK: [[sgId:%.+]] = gpu.subgroup_id : index
414418
//CHECK-DAG: [[IDY:%.+]] = affine.apply #map2()[[[sgId]]]
415419
//CHECK-DAG: [[c32:%.+]] = arith.constant 32 : index
416-
//CHECK-DAG: [[LY:%.+]] = index.mul [[IDY]], [[c32]]
420+
//CHECK-DAG: [[LOCALY:%.+]] = index.mul [[IDY]], [[c32]]
417421
//CHECK-DAG: [[c0:%.+]] = arith.constant 0 : index
422+
//CHECK-DAG: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index
418423
//CHECK-DAG: [[c128:%.+]] = arith.constant 128 : index
419-
//CHECK-DAG: [[MODY:%.+]] = index.remu [[LY]], [[c128]]
424+
//CHECK-DAG: [[MODY:%.+]] = index.remu [[Y]], [[c128]]
420425
//CHECK-DAG: [[BASE:%.+]] = vector.step : vector<32xindex>
421426
//CHECK-DAG: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<32xindex>
422427
//CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<32xindex>
@@ -430,8 +435,9 @@ gpu.module @test_distribution {
430435
//CHECK-DAG: [[c8:%.+]] = arith.constant 8 : index
431436
//CHECK-DAG: [[LOCALY:%.+]] = index.mul [[sgId]], [[c8]]
432437
//CHECK-DAG: [[c0:%.+]] = arith.constant 0 : index
438+
//CHECK-DAG: [[Y:%.+]] = arith.addi [[LOCALY]], [[c0]] : index
433439
//CHECK-DAG: [[c128:%.+]] = arith.constant 128 : index
434-
//CHECK-DAG: [[MODY:%.+]] = index.remu [[LOCALY]], [[c128]]
440+
//CHECK-DAG: [[MODY:%.+]] = index.remu [[Y]], [[c128]]
435441
//CHECK-DAG: [[BASE:%.+]] = vector.step : vector<8xindex>
436442
//CHECK-DAG: [[CAST:%.+]] = vector.broadcast [[MODY]] : index to vector<8xindex>
437443
//CHECK: [[ADD:%.+]] = arith.addi [[BASE]], [[CAST]] : vector<8xindex>

mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ gpu.module @test_1_1_assignment {
1414
//CHECK: [[LX:%.+]] = index.mul [[SGIDX]], [[C32]]
1515
//CHECK: [[C0:%.+]] = arith.constant 0 : index
1616
//CHECK: [[C0_1:%.+]] = arith.constant 0 : index
17+
//CHECK: [[UY:%.+]] = arith.addi [[LY]], [[C0]] : index
18+
//CHECK: [[UX:%.+]] = arith.addi [[LX]], [[C0_1]] : index
1719
//CHECK: [[C256:%.+]] = arith.constant 256 : index
18-
//CHECK: [[Y:%.+]] = index.remu [[LY]], [[C256]]
20+
//CHECK: [[Y:%.+]] = index.remu [[UY]], [[C256]]
1921
//CHECK: [[C128:%.+]] = arith.constant 128 : index
20-
//CHECK: [[X:%.+]] = index.remu [[LX]], [[C128]]
22+
//CHECK: [[X:%.+]] = index.remu [[UX]], [[C128]]
2123
//CHECK: [[TDESC:%.+]] = xegpu.create_nd_tdesc [[ARG_0]][[[Y]], [[X]]] : memref<256x128xf32> -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
2224
%tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<256x128xf32>
2325
-> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
@@ -35,13 +37,17 @@ gpu.module @test_1_1_assignment {
3537
//CHECK: [[LX:%.+]] = index.mul [[SGIDX]], [[C32]]
3638
//CHECK: [[C0:%.+]] = arith.constant 0 : index
3739
//CHECK: [[C0_2:%.+]] = arith.constant 0 : index
40+
//CHECK: [[UY:%.+]] = arith.addi [[LY]], [[C0]] : index
41+
//CHECK: [[UX:%.+]] = arith.addi [[LX]], [[C0_2]] : index
3842
//CHECK: [[C256:%.+]] = arith.constant 256 : index
39-
//CHECK: [[MODY:%.+]] = index.remu [[LY]], [[C256]]
43+
//CHECK: [[MODY:%.+]] = index.remu [[UY]], [[C256]]
4044
//CHECK: [[C128:%.+]] = arith.constant 128 : index
41-
//CHECK: [[MODX:%.+]] = index.remu [[LX]], [[C128]]
45+
//CHECK: [[MODX:%.+]] = index.remu [[UX]], [[C128]]
4246
//CHECK: [[C0_3:%.+]] = arith.constant 0 : index
47+
//CHECK: [[Y:%.+]] = index.add [[MODY]], [[C0_3]]
4348
//CHECK: [[C0_4:%.+]] = arith.constant 0 : index
44-
//CHECK: [[TDESC:%.+]] = xegpu.create_nd_tdesc [[ARG_0]][1, [[MODY]], [[MODX]]] : memref<3x256x128xf32> -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
49+
//CHECK: [[X:%.+]] = index.add [[MODX]], [[C0_4]]
50+
//CHECK: [[TDESC:%.+]] = xegpu.create_nd_tdesc [[ARG_0]][1, [[Y]], [[X]]] : memref<3x256x128xf32> -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
4551
%tdesc = xegpu.create_nd_tdesc %src[1, 0, 0] : memref<3x256x128xf32>
4652
-> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
4753
gpu.return

mlir/test/lib/Transforms/TestSingleFold.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ struct TestSingleFold : public PassWrapper<TestSingleFold, OperationPass<>>,
2626
public RewriterBase::Listener {
2727
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestSingleFold)
2828

29-
TestSingleFold() = default;
30-
TestSingleFold(const TestSingleFold &pass) : PassWrapper(pass) {}
31-
3229
StringRef getArgument() const final { return "test-single-fold"; }
3330
StringRef getDescription() const final {
3431
return "Test single-pass operation folding and dead constant elimination";
@@ -48,18 +45,13 @@ struct TestSingleFold : public PassWrapper<TestSingleFold, OperationPass<>>,
4845
if (it != existingConstants.end())
4946
existingConstants.erase(it);
5047
}
51-
52-
Option<int> maxIterations{*this, "max-iterations",
53-
llvm::cl::desc("Max iterations in the tryToFold"),
54-
llvm::cl::init(1)};
5548
};
5649
} // namespace
5750

5851
void TestSingleFold::foldOperation(Operation *op, OperationFolder &helper) {
5952
// Attempt to fold the specified operation, including handling unused or
6053
// duplicated constants.
61-
bool inPlaceUpdate = false;
62-
(void)helper.tryToFold(op, &inPlaceUpdate, maxIterations);
54+
(void)helper.tryToFold(op);
6355
}
6456

6557
void TestSingleFold::runOnOperation() {

0 commit comments

Comments
 (0)