Skip to content

Commit f528df8

Browse files
committed
Revert "Add a test for UsedDeclVisitor"
This reverts commit b58f6bb.
1 parent b58f6bb commit f528df8

File tree

2 files changed

+262
-26
lines changed

2 files changed

+262
-26
lines changed

clang/test/CodeGenCXX/used-decl-visitor.cpp

Lines changed: 0 additions & 18 deletions
This file was deleted.
Lines changed: 262 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,268 @@
1-
struct A {
2-
int a;
3-
};
1+
//===- AffineDataCopyGeneration.cpp - Explicit memref copying pass ------*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements a pass to automatically promote accessed memref regions
10+
// to buffers in a faster memory space that is explicitly managed, with the
11+
// necessary data movement operations performed through either regular
12+
// point-wise load/store's or DMAs. Such explicit copying (also referred to as
13+
// array packing/unpacking in the literature), when done on arrays that exhibit
14+
// reuse, results in near elimination of conflict misses, TLB misses, reduced
15+
// use of hardware prefetch streams, and reduced false sharing. It is also
16+
// necessary for hardware that explicitly managed levels in the memory
17+
// hierarchy, and where DMAs may have to be used. This optimization is often
18+
// performed on already tiled code.
19+
//
20+
//===----------------------------------------------------------------------===//
21+
22+
#include "mlir/Analysis/Utils.h"
23+
#include "mlir/Dialect/AffineOps/AffineOps.h"
24+
#include "mlir/IR/Builders.h"
25+
#include "mlir/Pass/Pass.h"
26+
#include "mlir/Transforms/LoopUtils.h"
27+
#include "mlir/Transforms/Passes.h"
28+
#include "mlir/Transforms/Utils.h"
29+
#include "llvm/ADT/MapVector.h"
30+
#include "llvm/Support/CommandLine.h"
31+
#include "llvm/Support/Debug.h"
32+
#include <algorithm>
33+
34+
#define DEBUG_TYPE "affine-data-copy-generate"
35+
36+
using namespace mlir;
37+
38+
static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
39+
40+
static llvm::cl::opt<unsigned long long> clFastMemoryCapacity(
41+
"affine-data-copy-generate-fast-mem-capacity",
42+
llvm::cl::desc(
43+
"Set fast memory space capacity in KiB (default: unlimited)"),
44+
llvm::cl::cat(clOptionsCategory));
45+
46+
static llvm::cl::opt<bool>
47+
clDma("affine-data-copy-generate-dma",
48+
llvm::cl::desc("Generate DMA instead of point-wise copy"),
49+
llvm::cl::cat(clOptionsCategory), llvm::cl::init(true));
50+
51+
static llvm::cl::opt<unsigned> clFastMemorySpace(
52+
"affine-data-copy-generate-fast-mem-space", llvm::cl::init(1),
53+
llvm::cl::desc(
54+
"Fast memory space identifier for copy generation (default: 1)"),
55+
llvm::cl::cat(clOptionsCategory));
56+
57+
static llvm::cl::opt<bool> clSkipNonUnitStrideLoop(
58+
"affine-data-copy-generate-skip-non-unit-stride-loops", llvm::cl::Hidden,
59+
llvm::cl::init(false),
60+
llvm::cl::desc("Testing purposes: avoid non-unit stride loop choice depths "
61+
"for copy placement"),
62+
llvm::cl::cat(clOptionsCategory));
463

5-
static A a;
64+
namespace {
665

7-
struct B {
8-
B(int b = a.a) {}
66+
/// Replaces all loads and stores on memref's living in 'slowMemorySpace' by
67+
/// introducing copy operations to transfer data into `fastMemorySpace` and
68+
/// rewriting the original load's/store's to instead load/store from the
69+
/// allocated fast memory buffers. Additional options specify the identifier
70+
/// corresponding to the fast memory space and the amount of fast memory space
71+
/// available. The pass traverses through the nesting structure, recursing to
72+
/// inner levels if necessary to determine at what depth copies need to be
73+
/// placed so that the allocated buffers fit within the memory capacity
74+
/// provided.
75+
// TODO(bondhugula): We currently can't generate copies correctly when stores
76+
// are strided. Check for strided stores.
77+
struct AffineDataCopyGeneration
78+
: public FunctionPass<AffineDataCopyGeneration> {
79+
explicit AffineDataCopyGeneration(
80+
unsigned slowMemorySpace = 0,
81+
unsigned fastMemorySpace = clFastMemorySpace, unsigned tagMemorySpace = 0,
82+
int minDmaTransferSize = 1024,
83+
uint64_t fastMemCapacityBytes =
84+
(clFastMemoryCapacity.getNumOccurrences() > 0
85+
? clFastMemoryCapacity * 1024 // cl-provided size is in KiB
86+
: std::numeric_limits<uint64_t>::max()),
87+
bool generateDma = clDma,
88+
bool skipNonUnitStrideLoops = clSkipNonUnitStrideLoop)
89+
: slowMemorySpace(slowMemorySpace), fastMemorySpace(fastMemorySpace),
90+
tagMemorySpace(tagMemorySpace), minDmaTransferSize(minDmaTransferSize),
91+
fastMemCapacityBytes(fastMemCapacityBytes), generateDma(generateDma),
92+
skipNonUnitStrideLoops(skipNonUnitStrideLoops) {}
93+
94+
explicit AffineDataCopyGeneration(const AffineDataCopyGeneration &other)
95+
: slowMemorySpace(other.slowMemorySpace),
96+
fastMemorySpace(other.fastMemorySpace),
97+
tagMemorySpace(other.tagMemorySpace),
98+
minDmaTransferSize(other.minDmaTransferSize),
99+
fastMemCapacityBytes(other.fastMemCapacityBytes),
100+
generateDma(other.generateDma),
101+
skipNonUnitStrideLoops(other.skipNonUnitStrideLoops) {}
102+
103+
void runOnFunction() override;
104+
LogicalResult runOnBlock(Block *block, DenseSet<Operation *> &copyNests);
105+
106+
// Slow memory space associated with copies.
107+
const unsigned slowMemorySpace;
108+
// Fast memory space associated with copies.
109+
unsigned fastMemorySpace;
110+
// Memory space associated with DMA tags.
111+
unsigned tagMemorySpace;
112+
// Minimum DMA transfer size supported by the target in bytes.
113+
const int minDmaTransferSize;
114+
// Capacity of the faster memory space.
115+
uint64_t fastMemCapacityBytes;
116+
117+
// If set, generate DMA operations instead of read/write.
118+
bool generateDma;
119+
120+
// If set, ignore loops with steps other than 1.
121+
bool skipNonUnitStrideLoops;
122+
123+
// Constant zero index to avoid too many duplicates.
124+
Value zeroIndex = nullptr;
9125
};
10126

127+
} // end anonymous namespace
11128

12-
void foo() {
13-
B();
129+
/// Generates copies for memref's living in 'slowMemorySpace' into newly created
130+
/// buffers in 'fastMemorySpace', and replaces memory operations to the former
131+
/// by the latter. Only load op's handled for now.
132+
/// TODO(bondhugula): extend this to store op's.
133+
std::unique_ptr<OpPassBase<FuncOp>> mlir::createAffineDataCopyGenerationPass(
134+
unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace,
135+
int minDmaTransferSize, uint64_t fastMemCapacityBytes) {
136+
return std::make_unique<AffineDataCopyGeneration>(
137+
slowMemorySpace, fastMemorySpace, tagMemorySpace, minDmaTransferSize,
138+
fastMemCapacityBytes);
14139
}
140+
141+
/// Generate copies for this block. The block is partitioned into separate
142+
/// ranges: each range is either a sequence of one or more operations starting
143+
/// and ending with an affine load or store op, or just an affine.forop (which
144+
/// could have other affine for op's nested within).
145+
LogicalResult
146+
AffineDataCopyGeneration::runOnBlock(Block *block,
147+
DenseSet<Operation *> &copyNests) {
148+
if (block->empty())
149+
return success();
150+
151+
AffineCopyOptions copyOptions = {generateDma, slowMemorySpace,
152+
fastMemorySpace, tagMemorySpace,
153+
fastMemCapacityBytes};
154+
155+
// Every affine.forop in the block starts and ends a block range for copying;
156+
// in addition, a contiguous sequence of operations starting with a
157+
// load/store op but not including any copy nests themselves is also
158+
// identified as a copy block range. Straightline code (a contiguous chunk of
159+
// operations excluding AffineForOp's) are always assumed to not exhaust
160+
// memory. As a result, this approach is conservative in some cases at the
161+
// moment; we do a check later and report an error with location info.
162+
// TODO(bondhugula): An 'affine.if' operation is being treated similar to an
163+
// operation. 'affine.if''s could have 'affine.for's in them;
164+
// treat them separately.
165+
166+
// Get to the first load, store, or for op (that is not a copy nest itself).
167+
auto curBegin =
168+
std::find_if(block->begin(), block->end(), [&](Operation &op) {
169+
return (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
170+
isa<AffineForOp>(op)) &&
171+
copyNests.count(&op) == 0;
172+
});
173+
174+
// Create [begin, end) ranges.
175+
auto it = curBegin;
176+
while (it != block->end()) {
177+
AffineForOp forOp;
178+
// If you hit a non-copy for loop, we will split there.
179+
if ((forOp = dyn_cast<AffineForOp>(&*it)) && copyNests.count(forOp) == 0) {
180+
// Perform the copying up unti this 'for' op first.
181+
affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/it, copyOptions,
182+
/*filterMemRef=*/llvm::None, copyNests);
183+
184+
// Returns true if the footprint is known to exceed capacity.
185+
auto exceedsCapacity = [&](AffineForOp forOp) {
186+
Optional<int64_t> footprint =
187+
getMemoryFootprintBytes(forOp,
188+
/*memorySpace=*/0);
189+
return (footprint.hasValue() &&
190+
static_cast<uint64_t>(footprint.getValue()) >
191+
fastMemCapacityBytes);
192+
};
193+
194+
// If the memory footprint of the 'affine.for' loop is higher than fast
195+
// memory capacity (when provided), we recurse to copy at an inner level
196+
// until we find a depth at which footprint fits in fast mem capacity. If
197+
// the footprint can't be calculated, we assume for now it fits. Recurse
198+
// inside if footprint for 'forOp' exceeds capacity, or when
199+
// skipNonUnitStrideLoops is set and the step size is not one.
200+
bool recurseInner = skipNonUnitStrideLoops ? forOp.getStep() != 1
201+
: exceedsCapacity(forOp);
202+
if (recurseInner) {
203+
// We'll recurse and do the copies at an inner level for 'forInst'.
204+
// Recurse onto the body of this loop.
205+
runOnBlock(forOp.getBody(), copyNests);
206+
} else {
207+
// We have enough capacity, i.e., copies will be computed for the
208+
// portion of the block until 'it', and for 'it', which is 'forOp'. Note
209+
// that for the latter, the copies are placed just before this loop (for
210+
// incoming copies) and right after (for outgoing ones).
211+
212+
// Inner loop copies have their own scope - we don't thus update
213+
// consumed capacity. The footprint check above guarantees this inner
214+
// loop's footprint fits.
215+
affineDataCopyGenerate(/*begin=*/it, /*end=*/std::next(it), copyOptions,
216+
/*filterMemRef=*/llvm::None, copyNests);
217+
}
218+
// Get to the next load or store op after 'forOp'.
219+
curBegin = std::find_if(std::next(it), block->end(), [&](Operation &op) {
220+
return (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
221+
isa<AffineForOp>(op)) &&
222+
copyNests.count(&op) == 0;
223+
});
224+
it = curBegin;
225+
} else {
226+
assert(copyNests.count(&*it) == 0 &&
227+
"all copy nests generated should have been skipped above");
228+
// We simply include this op in the current range and continue for more.
229+
++it;
230+
}
231+
}
232+
233+
// Generate the copy for the final block range.
234+
if (curBegin != block->end()) {
235+
// Can't be a terminator because it would have been skipped above.
236+
assert(!curBegin->isKnownTerminator() && "can't be a terminator");
237+
// Exclude the affine terminator - hence, the std::prev.
238+
affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/std::prev(block->end()),
239+
copyOptions, /*filterMemRef=*/llvm::None, copyNests);
240+
}
241+
242+
return success();
243+
}
244+
245+
void AffineDataCopyGeneration::runOnFunction() {
246+
FuncOp f = getFunction();
247+
OpBuilder topBuilder(f.getBody());
248+
zeroIndex = topBuilder.create<ConstantIndexOp>(f.getLoc(), 0);
249+
250+
// Nests that are copy-in's or copy-out's; the root AffineForOps of those
251+
// nests are stored herein.
252+
DenseSet<Operation *> copyNests;
253+
254+
// Clear recorded copy nests.
255+
copyNests.clear();
256+
257+
for (auto &block : f)
258+
runOnBlock(&block, copyNests);
259+
260+
// Promote any single iteration loops in the copy nests.
261+
for (auto nest : copyNests) {
262+
nest->walk([](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
263+
}
264+
}
265+
266+
static PassRegistration<AffineDataCopyGeneration>
267+
pass("affine-data-copy-generate",
268+
"Generate explicit copying for memory operations");

0 commit comments

Comments
 (0)