Skip to content

Commit f1e443f

Browse files
timshen91memfrob
authored andcommitted
[mlir] Add a simplifying wrapper for generateCopy and expose it.
Summary: affineDataCopyGenerate is a monolithinc function that combines several steps for good reasons, but it makes customizing the behaivor even harder. The major two steps by affineDataCopyGenerate are: a) Identify interesting memrefs and collect their uses. b) Create new buffers to forward these uses. Step (a) actually has requires tremendous customization options. One could see that from the recently added filterMemRef parameter. This patch adds a function that only does (b), in the hope that (a) can be directly implemented by the callers. In fact, (a) is quite simple if the caller has only one buffer to consider, or even one use. Differential Revision: https://reviews.llvm.org/D75965
1 parent 1f98c55 commit f1e443f

File tree

4 files changed

+84
-7
lines changed

4 files changed

+84
-7
lines changed

mlir/include/mlir/Transforms/LoopUtils.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class AffineForOp;
2424
class FuncOp;
2525
class OpBuilder;
2626
class Value;
27+
struct MemRefRegion;
2728

2829
namespace loop {
2930
class ForOp;
@@ -185,6 +186,34 @@ uint64_t affineDataCopyGenerate(AffineForOp forOp,
185186
Optional<Value> filterMemRef,
186187
DenseSet<Operation *> &copyNests);
187188

189+
/// Result for calling generateCopyForMemRegion.
190+
struct CopyGenerateResult {
191+
// Number of bytes used by alloc.
192+
uint64_t sizeInBytes;
193+
194+
// The newly created buffer allocation.
195+
Operation *alloc;
196+
197+
// Generated loop nest for copying data between the allocated buffer and the
198+
// original memref.
199+
Operation *copyNest;
200+
};
201+
202+
/// generateCopyForMemRegion is similar to affineDataCopyGenerate, but works
203+
/// with a single memref region. `memrefRegion` is supposed to contain analysis
204+
/// information within analyzedOp. The generated prologue and epilogue always
205+
/// surround `analyzedOp`.
206+
///
207+
/// Note that `analyzedOp` is a single op for API convenience, and the
208+
/// [begin, end) version can be added as needed.
209+
///
210+
/// Also note that certain options in `copyOptions` aren't looked at anymore,
211+
/// like slowMemorySpace.
212+
LogicalResult generateCopyForMemRegion(const MemRefRegion &memrefRegion,
213+
Operation *analyzedOp,
214+
const AffineCopyOptions &copyOptions,
215+
CopyGenerateResult &result);
216+
188217
/// Tile a nest of standard for loops rooted at `rootForOp` by finding such
189218
/// parametric tile sizes that the outer loops have a fixed number of iterations
190219
/// as defined in `sizes`.

mlir/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,6 +1797,28 @@ uint64_t mlir::affineDataCopyGenerate(AffineForOp forOp,
17971797
filterMemRef, copyNests);
17981798
}
17991799

1800+
LogicalResult mlir::generateCopyForMemRegion(
1801+
const MemRefRegion &memrefRegion, Operation *analyzedOp,
1802+
const AffineCopyOptions &copyOptions, CopyGenerateResult &result) {
1803+
Block *block = analyzedOp->getBlock();
1804+
auto begin = analyzedOp->getIterator();
1805+
auto end = std::next(begin);
1806+
DenseMap<Value, Value> fastBufferMap;
1807+
DenseSet<Operation *> copyNests;
1808+
1809+
auto err = generateCopy(memrefRegion, block, begin, end, block, begin, end,
1810+
copyOptions, fastBufferMap, copyNests,
1811+
&result.sizeInBytes, &begin, &end);
1812+
if (failed(err))
1813+
return err;
1814+
1815+
result.alloc =
1816+
fastBufferMap.find(memrefRegion.memref)->second.getDefiningOp();
1817+
assert(copyNests.size() <= 1 && "At most one copy nest is expected.");
1818+
result.copyNest = copyNests.empty() ? nullptr : *copyNests.begin();
1819+
return success();
1820+
}
1821+
18001822
/// Gathers all AffineForOps in 'block' at 'currLoopDepth' in 'depthToLoops'.
18011823
static void
18021824
gatherLoopsInBlock(Block *block, unsigned currLoopDepth,

mlir/test/Transforms/affine-data-copy.mlir

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
// affine data copy utility on the input loop nest.
77
// '-test-affine-data-copy-memref-filter' passes the first memref found in an
88
// affine.load op in the innermost loop as a filter.
9-
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='memref-filter=1' | FileCheck %s --check-prefix=FILTER
9+
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='memref-filter' | FileCheck %s --check-prefix=FILTER
10+
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='for-memref-region' | FileCheck %s --check-prefix=MEMREF_REGION
1011

1112
// -copy-skip-non-stride-loops forces the copies to be placed right inside the
1213
// tile space loops, avoiding the sensitivity of copy placement depth to memory
@@ -140,6 +141,7 @@ func @matmul(%A: memref<4096x4096xf32>, %B: memref<4096x4096xf32>, %C: memref<40
140141
//
141142
// CHECK-SMALL-LABEL: func @foo
142143
// FILTER-LABEL: func @foo
144+
// MEMREF_REGION-LABEL: func @foo
143145
func @foo(%arg0: memref<1024x1024xf32>, %arg1: memref<1024x1024xf32>, %arg2: memref<1024x1024xf32>) -> memref<1024x1024xf32> {
144146
affine.for %i = 0 to 1024 {
145147
affine.for %j = 0 to 1024 {
@@ -198,3 +200,15 @@ func @foo(%arg0: memref<1024x1024xf32>, %arg1: memref<1024x1024xf32>, %arg2: mem
198200
// FILTER-NEXT: affine.for %{{.*}} = 0 to 1024 {
199201
// FILTER: dealloc %{{.*}} : memref<1024x1024xf32>
200202
// FILTER-NOT: dealloc
203+
204+
// CHeck that only one memref is copied, because for-memref-region is enabled
205+
// (and the first ever encountered load is analyzed).
206+
// MEMREF_REGION: alloc() : memref<1024x1024xf32>
207+
// MEMREF_REGION-NOT: alloc()
208+
// MEMREF_REGION: affine.for %{{.*}} = 0 to 1024 {
209+
// MEMREF_REGION: affine.for %{{.*}} = 0 to 1024 {
210+
// MEMREF_REGION: affine.for %{{.*}} = 0 to 1024 {
211+
// MEMREF_REGION-NEXT: affine.for %{{.*}} = 0 to 1024 {
212+
// MEMREF_REGION-NEXT: affine.for %{{.*}} = 0 to 1024 {
213+
// MEMREF_REGION: dealloc %{{.*}} : memref<1024x1024xf32>
214+
// MEMREF_REGION-NOT: dealloc

mlir/test/lib/Transforms/TestAffineDataCopy.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "mlir/Analysis/Passes.h"
15+
#include "mlir/Analysis/Utils.h"
1516
#include "mlir/Dialect/AffineOps/AffineOps.h"
1617
#include "mlir/Pass/Pass.h"
1718
#include "mlir/Transforms/LoopUtils.h"
@@ -37,6 +38,10 @@ struct TestAffineDataCopy : public FunctionPass<TestAffineDataCopy> {
3738
llvm::cl::desc(
3839
"Enable memref filter testing in affine data copy optimization"),
3940
llvm::cl::init(false)};
41+
Option<bool> clTestGenerateCopyForMemRegion{
42+
*this, "for-memref-region",
43+
llvm::cl::desc("Test copy generation for a single memref region"),
44+
llvm::cl::init(false)};
4045
};
4146

4247
} // end anonymous namespace
@@ -55,13 +60,13 @@ void TestAffineDataCopy::runOnFunction() {
5560

5661
auto loopNest = depthToLoops[0][0];
5762
auto innermostLoop = depthToLoops[innermostLoopIdx][0];
58-
Optional<Value> memrefFilter;
59-
if (clMemRefFilter) {
63+
AffineLoadOp load;
64+
if (clMemRefFilter || clTestGenerateCopyForMemRegion) {
6065
// Gather MemRef filter. For simplicity, we use the first loaded memref
6166
// found in the innermost loop.
6267
for (auto &op : *innermostLoop.getBody()) {
63-
if (auto load = dyn_cast<AffineLoadOp>(op)) {
64-
memrefFilter = load.getMemRef();
68+
if (auto ld = dyn_cast<AffineLoadOp>(op)) {
69+
load = ld;
6570
break;
6671
}
6772
}
@@ -72,8 +77,15 @@ void TestAffineDataCopy::runOnFunction() {
7277
/*fastMemorySpace=*/0,
7378
/*tagMemorySpace=*/0,
7479
/*fastMemCapacityBytes=*/32 * 1024 * 1024UL};
75-
DenseSet<Operation *> copyNests;
76-
affineDataCopyGenerate(loopNest, copyOptions, memrefFilter, copyNests);
80+
if (clMemRefFilter) {
81+
DenseSet<Operation *> copyNests;
82+
affineDataCopyGenerate(loopNest, copyOptions, load.getMemRef(), copyNests);
83+
} else if (clTestGenerateCopyForMemRegion) {
84+
CopyGenerateResult result;
85+
MemRefRegion region(loopNest.getLoc());
86+
region.compute(load, /*loopDepth=*/0);
87+
generateCopyForMemRegion(region, loopNest, copyOptions, result);
88+
}
7789
}
7890

7991
namespace mlir {

0 commit comments

Comments
 (0)