Skip to content

Commit b58f6bb

Browse files
committed
Add a test for UsedDeclVisitor
This test is reduced from mlir/lib/Transforms/AffineDataCopyGeneration.cpp to make sure there is no assertion due to UsedDeclVisitor.
1 parent 2ea5495 commit b58f6bb

File tree

2 files changed

+26
-262
lines changed

2 files changed

+26
-262
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: %clang_cc1 -triple x86_64 -emit-llvm -o %t %s
2+
3+
// Make sure there is no assertion due to UsedDeclVisitor.
4+
5+
struct A {
6+
int a;
7+
};
8+
9+
static A a;
10+
11+
struct B {
12+
B(int b = a.a) {}
13+
};
14+
15+
16+
void foo() {
17+
B();
18+
}
Lines changed: 8 additions & 262 deletions
Original file line numberDiff line numberDiff line change
@@ -1,268 +1,14 @@
1-
//===- AffineDataCopyGeneration.cpp - Explicit memref copying pass ------*-===//
2-
//
3-
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4-
// See https://llvm.org/LICENSE.txt for license information.
5-
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6-
//
7-
//===----------------------------------------------------------------------===//
8-
//
9-
// This file implements a pass to automatically promote accessed memref regions
10-
// to buffers in a faster memory space that is explicitly managed, with the
11-
// necessary data movement operations performed through either regular
12-
// point-wise load/store's or DMAs. Such explicit copying (also referred to as
13-
// array packing/unpacking in the literature), when done on arrays that exhibit
14-
// reuse, results in near elimination of conflict misses, TLB misses, reduced
15-
// use of hardware prefetch streams, and reduced false sharing. It is also
16-
// necessary for hardware that explicitly managed levels in the memory
17-
// hierarchy, and where DMAs may have to be used. This optimization is often
18-
// performed on already tiled code.
19-
//
20-
//===----------------------------------------------------------------------===//
21-
22-
#include "mlir/Analysis/Utils.h"
23-
#include "mlir/Dialect/AffineOps/AffineOps.h"
24-
#include "mlir/IR/Builders.h"
25-
#include "mlir/Pass/Pass.h"
26-
#include "mlir/Transforms/LoopUtils.h"
27-
#include "mlir/Transforms/Passes.h"
28-
#include "mlir/Transforms/Utils.h"
29-
#include "llvm/ADT/MapVector.h"
30-
#include "llvm/Support/CommandLine.h"
31-
#include "llvm/Support/Debug.h"
32-
#include <algorithm>
33-
34-
#define DEBUG_TYPE "affine-data-copy-generate"
35-
36-
using namespace mlir;
37-
38-
static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options");
39-
40-
static llvm::cl::opt<unsigned long long> clFastMemoryCapacity(
41-
"affine-data-copy-generate-fast-mem-capacity",
42-
llvm::cl::desc(
43-
"Set fast memory space capacity in KiB (default: unlimited)"),
44-
llvm::cl::cat(clOptionsCategory));
45-
46-
static llvm::cl::opt<bool>
47-
clDma("affine-data-copy-generate-dma",
48-
llvm::cl::desc("Generate DMA instead of point-wise copy"),
49-
llvm::cl::cat(clOptionsCategory), llvm::cl::init(true));
50-
51-
static llvm::cl::opt<unsigned> clFastMemorySpace(
52-
"affine-data-copy-generate-fast-mem-space", llvm::cl::init(1),
53-
llvm::cl::desc(
54-
"Fast memory space identifier for copy generation (default: 1)"),
55-
llvm::cl::cat(clOptionsCategory));
56-
57-
static llvm::cl::opt<bool> clSkipNonUnitStrideLoop(
58-
"affine-data-copy-generate-skip-non-unit-stride-loops", llvm::cl::Hidden,
59-
llvm::cl::init(false),
60-
llvm::cl::desc("Testing purposes: avoid non-unit stride loop choice depths "
61-
"for copy placement"),
62-
llvm::cl::cat(clOptionsCategory));
63-
64-
namespace {
65-
66-
/// Replaces all loads and stores on memref's living in 'slowMemorySpace' by
67-
/// introducing copy operations to transfer data into `fastMemorySpace` and
68-
/// rewriting the original load's/store's to instead load/store from the
69-
/// allocated fast memory buffers. Additional options specify the identifier
70-
/// corresponding to the fast memory space and the amount of fast memory space
71-
/// available. The pass traverses through the nesting structure, recursing to
72-
/// inner levels if necessary to determine at what depth copies need to be
73-
/// placed so that the allocated buffers fit within the memory capacity
74-
/// provided.
75-
// TODO(bondhugula): We currently can't generate copies correctly when stores
76-
// are strided. Check for strided stores.
77-
struct AffineDataCopyGeneration
78-
: public FunctionPass<AffineDataCopyGeneration> {
79-
explicit AffineDataCopyGeneration(
80-
unsigned slowMemorySpace = 0,
81-
unsigned fastMemorySpace = clFastMemorySpace, unsigned tagMemorySpace = 0,
82-
int minDmaTransferSize = 1024,
83-
uint64_t fastMemCapacityBytes =
84-
(clFastMemoryCapacity.getNumOccurrences() > 0
85-
? clFastMemoryCapacity * 1024 // cl-provided size is in KiB
86-
: std::numeric_limits<uint64_t>::max()),
87-
bool generateDma = clDma,
88-
bool skipNonUnitStrideLoops = clSkipNonUnitStrideLoop)
89-
: slowMemorySpace(slowMemorySpace), fastMemorySpace(fastMemorySpace),
90-
tagMemorySpace(tagMemorySpace), minDmaTransferSize(minDmaTransferSize),
91-
fastMemCapacityBytes(fastMemCapacityBytes), generateDma(generateDma),
92-
skipNonUnitStrideLoops(skipNonUnitStrideLoops) {}
93-
94-
explicit AffineDataCopyGeneration(const AffineDataCopyGeneration &other)
95-
: slowMemorySpace(other.slowMemorySpace),
96-
fastMemorySpace(other.fastMemorySpace),
97-
tagMemorySpace(other.tagMemorySpace),
98-
minDmaTransferSize(other.minDmaTransferSize),
99-
fastMemCapacityBytes(other.fastMemCapacityBytes),
100-
generateDma(other.generateDma),
101-
skipNonUnitStrideLoops(other.skipNonUnitStrideLoops) {}
102-
103-
void runOnFunction() override;
104-
LogicalResult runOnBlock(Block *block, DenseSet<Operation *> &copyNests);
105-
106-
// Slow memory space associated with copies.
107-
const unsigned slowMemorySpace;
108-
// Fast memory space associated with copies.
109-
unsigned fastMemorySpace;
110-
// Memory space associated with DMA tags.
111-
unsigned tagMemorySpace;
112-
// Minimum DMA transfer size supported by the target in bytes.
113-
const int minDmaTransferSize;
114-
// Capacity of the faster memory space.
115-
uint64_t fastMemCapacityBytes;
116-
117-
// If set, generate DMA operations instead of read/write.
118-
bool generateDma;
119-
120-
// If set, ignore loops with steps other than 1.
121-
bool skipNonUnitStrideLoops;
122-
123-
// Constant zero index to avoid too many duplicates.
124-
Value zeroIndex = nullptr;
1+
struct A {
2+
int a;
1253
};
1264

127-
} // end anonymous namespace
5+
static A a;
1286

129-
/// Generates copies for memref's living in 'slowMemorySpace' into newly created
130-
/// buffers in 'fastMemorySpace', and replaces memory operations to the former
131-
/// by the latter. Only load op's handled for now.
132-
/// TODO(bondhugula): extend this to store op's.
133-
std::unique_ptr<OpPassBase<FuncOp>> mlir::createAffineDataCopyGenerationPass(
134-
unsigned slowMemorySpace, unsigned fastMemorySpace, unsigned tagMemorySpace,
135-
int minDmaTransferSize, uint64_t fastMemCapacityBytes) {
136-
return std::make_unique<AffineDataCopyGeneration>(
137-
slowMemorySpace, fastMemorySpace, tagMemorySpace, minDmaTransferSize,
138-
fastMemCapacityBytes);
139-
}
140-
141-
/// Generate copies for this block. The block is partitioned into separate
142-
/// ranges: each range is either a sequence of one or more operations starting
143-
/// and ending with an affine load or store op, or just an affine.forop (which
144-
/// could have other affine for op's nested within).
145-
LogicalResult
146-
AffineDataCopyGeneration::runOnBlock(Block *block,
147-
DenseSet<Operation *> &copyNests) {
148-
if (block->empty())
149-
return success();
150-
151-
AffineCopyOptions copyOptions = {generateDma, slowMemorySpace,
152-
fastMemorySpace, tagMemorySpace,
153-
fastMemCapacityBytes};
154-
155-
// Every affine.forop in the block starts and ends a block range for copying;
156-
// in addition, a contiguous sequence of operations starting with a
157-
// load/store op but not including any copy nests themselves is also
158-
// identified as a copy block range. Straightline code (a contiguous chunk of
159-
// operations excluding AffineForOp's) are always assumed to not exhaust
160-
// memory. As a result, this approach is conservative in some cases at the
161-
// moment; we do a check later and report an error with location info.
162-
// TODO(bondhugula): An 'affine.if' operation is being treated similar to an
163-
// operation. 'affine.if''s could have 'affine.for's in them;
164-
// treat them separately.
165-
166-
// Get to the first load, store, or for op (that is not a copy nest itself).
167-
auto curBegin =
168-
std::find_if(block->begin(), block->end(), [&](Operation &op) {
169-
return (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
170-
isa<AffineForOp>(op)) &&
171-
copyNests.count(&op) == 0;
172-
});
173-
174-
// Create [begin, end) ranges.
175-
auto it = curBegin;
176-
while (it != block->end()) {
177-
AffineForOp forOp;
178-
// If you hit a non-copy for loop, we will split there.
179-
if ((forOp = dyn_cast<AffineForOp>(&*it)) && copyNests.count(forOp) == 0) {
180-
// Perform the copying up unti this 'for' op first.
181-
affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/it, copyOptions,
182-
/*filterMemRef=*/llvm::None, copyNests);
183-
184-
// Returns true if the footprint is known to exceed capacity.
185-
auto exceedsCapacity = [&](AffineForOp forOp) {
186-
Optional<int64_t> footprint =
187-
getMemoryFootprintBytes(forOp,
188-
/*memorySpace=*/0);
189-
return (footprint.hasValue() &&
190-
static_cast<uint64_t>(footprint.getValue()) >
191-
fastMemCapacityBytes);
192-
};
193-
194-
// If the memory footprint of the 'affine.for' loop is higher than fast
195-
// memory capacity (when provided), we recurse to copy at an inner level
196-
// until we find a depth at which footprint fits in fast mem capacity. If
197-
// the footprint can't be calculated, we assume for now it fits. Recurse
198-
// inside if footprint for 'forOp' exceeds capacity, or when
199-
// skipNonUnitStrideLoops is set and the step size is not one.
200-
bool recurseInner = skipNonUnitStrideLoops ? forOp.getStep() != 1
201-
: exceedsCapacity(forOp);
202-
if (recurseInner) {
203-
// We'll recurse and do the copies at an inner level for 'forInst'.
204-
// Recurse onto the body of this loop.
205-
runOnBlock(forOp.getBody(), copyNests);
206-
} else {
207-
// We have enough capacity, i.e., copies will be computed for the
208-
// portion of the block until 'it', and for 'it', which is 'forOp'. Note
209-
// that for the latter, the copies are placed just before this loop (for
210-
// incoming copies) and right after (for outgoing ones).
211-
212-
// Inner loop copies have their own scope - we don't thus update
213-
// consumed capacity. The footprint check above guarantees this inner
214-
// loop's footprint fits.
215-
affineDataCopyGenerate(/*begin=*/it, /*end=*/std::next(it), copyOptions,
216-
/*filterMemRef=*/llvm::None, copyNests);
217-
}
218-
// Get to the next load or store op after 'forOp'.
219-
curBegin = std::find_if(std::next(it), block->end(), [&](Operation &op) {
220-
return (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op) ||
221-
isa<AffineForOp>(op)) &&
222-
copyNests.count(&op) == 0;
223-
});
224-
it = curBegin;
225-
} else {
226-
assert(copyNests.count(&*it) == 0 &&
227-
"all copy nests generated should have been skipped above");
228-
// We simply include this op in the current range and continue for more.
229-
++it;
230-
}
231-
}
232-
233-
// Generate the copy for the final block range.
234-
if (curBegin != block->end()) {
235-
// Can't be a terminator because it would have been skipped above.
236-
assert(!curBegin->isKnownTerminator() && "can't be a terminator");
237-
// Exclude the affine terminator - hence, the std::prev.
238-
affineDataCopyGenerate(/*begin=*/curBegin, /*end=*/std::prev(block->end()),
239-
copyOptions, /*filterMemRef=*/llvm::None, copyNests);
240-
}
241-
242-
return success();
243-
}
244-
245-
void AffineDataCopyGeneration::runOnFunction() {
246-
FuncOp f = getFunction();
247-
OpBuilder topBuilder(f.getBody());
248-
zeroIndex = topBuilder.create<ConstantIndexOp>(f.getLoc(), 0);
249-
250-
// Nests that are copy-in's or copy-out's; the root AffineForOps of those
251-
// nests are stored herein.
252-
DenseSet<Operation *> copyNests;
253-
254-
// Clear recorded copy nests.
255-
copyNests.clear();
7+
struct B {
8+
B(int b = a.a) {}
9+
};
25610

257-
for (auto &block : f)
258-
runOnBlock(&block, copyNests);
25911

260-
// Promote any single iteration loops in the copy nests.
261-
for (auto nest : copyNests) {
262-
nest->walk([](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
263-
}
12+
void foo() {
13+
B();
26414
}
265-
266-
static PassRegistration<AffineDataCopyGeneration>
267-
pass("affine-data-copy-generate",
268-
"Generate explicit copying for memory operations");

0 commit comments

Comments
 (0)