Skip to content

Commit 1447eb5

Browse files
[Reprogram] Add a global flag + e2e test case for reprogramming Dmas (#1334)
-- This commit adds a global flag `--iree-amdaie-reprogram-dmas` to be used when we want to reprogram the DMAs. -- Also adds an e2e test case. -- This is in accordance to the [reprogramming DMA work](#1287) and is the final PR to start off supporting it. Signed-off-by: Abhishek Varma <[email protected]> --------- Signed-off-by: Abhishek Varma <[email protected]>
1 parent 1760d70 commit 1447eb5

File tree

5 files changed

+71
-19
lines changed

5 files changed

+71
-19
lines changed

build_tools/ci/cpu_comparison/matmul_test_config.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,20 @@
1616
"--iree-amdaie-num-cols=1",
1717
],
1818
},
19+
{
20+
"M": 32,
21+
"N": 32,
22+
"K": 32,
23+
"input_type": "i32",
24+
"acc_type": "i32",
25+
"name_suffix": "reprogram_dma",
26+
"additional_labels": ["ReprogramDmas"],
27+
"aie_compilation_flags": [
28+
"--iree-amdaie-num-rows=1",
29+
"--iree-amdaie-num-cols=1",
30+
"--iree-amdaie-reprogram-dmas",
31+
],
32+
},
1933
# 2x2 core tests.
2034
{
2135
"M": 32,

compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ class AIETargetBackend final : public IREE::HAL::TargetBackend {
241241
options.enableCoalescingLoops, options.enableCollapsingUnitDims,
242242
options.enableFunctionOutlining, options.callReplication,
243243
options.insertLoopAroundCoreBlock, options.enableCtrlPkt,
244-
options.coreStackSize);
244+
options.coreStackSize, options.reprogramDmas);
245245
}
246246

247247
void buildLinkingPassPipeline(OpPassManager &passManager) override {

compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ struct AMDAIEOptions {
6464
bool matmulElementwiseFusion{false};
6565
AMDAIEDevice AMDAIETargetDevice{AMDAIEDevice::npu1_4col};
6666

67+
// Enable/Disable reprogramming of DMAs.
68+
bool reprogramDmas{false};
69+
6770
// The number of rows for the compiler to target. '0' denotes 'all'.
6871
private:
6972
unsigned AMDAIENumRows{0};
@@ -257,6 +260,13 @@ struct AMDAIEOptions {
257260
"for matmul-elementwise fusion. It is currently added for "
258261
"development purpose and should be removed in the future."));
259262

263+
binder.opt<bool>(
264+
"iree-amdaie-reprogram-dmas", reprogramDmas, llvm::cl::cat(category),
265+
llvm::cl::desc(
266+
"Flag used to enable/disable reprogramming of DMAs. "
267+
"By default it'll be disabled, so we would have circular "
268+
"DMAs for L2/L1 cache."));
269+
260270
/// Command line option for selecting the target AIE device.
261271
binder.opt<AMDAIEDevice>(
262272
"iree-amdaie-target-device", AMDAIETargetDevice,

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ static void addAMDAIEBufferizePasses(OpPassManager &pm,
107107
}
108108

109109
void addAMDAIEToAIEPasses(OpPassManager &passManager,
110-
bool insertLoopAroundCoreBlock) {
110+
bool insertLoopAroundCoreBlock, bool reprogramDmas) {
111111
// The infinite loop insertion transformation needs to be called before the
112112
// `AcquireReleaseToUseLock` pass as the latter will perform loop unrolling
113113
// based on the objFifo depths.
@@ -123,9 +123,7 @@ void addAMDAIEToAIEPasses(OpPassManager &passManager,
123123
passManager.addPass(createAMDAIEAddNoAliasFunctionArgumentsPass());
124124
{
125125
AMDAIELowerToAIEOptions options;
126-
// TODO(avarma): In follow-up PRs this will be replaced by a global flag.
127-
// Currently setting as `false`.
128-
options.reprogramDmas = /*reprogramDmas=*/false;
126+
options.reprogramDmas = reprogramDmas;
129127
passManager.addPass(createAMDAIELowerToAIEPass(options));
130128
}
131129
passManager.addPass(createAMDAIERemoveMemorySpacePass());
@@ -672,7 +670,7 @@ void buildAMDAIETransformPassPipeline(
672670
PacketFlowStrategy packetFlowStrategy, bool enableCoalescingLoops,
673671
bool enableCollapsingUnitDims, OutliningStrategy enableFunctionOutlining,
674672
int callReplication, bool insertLoopAroundCoreBlock, bool enableCtrlPkt,
675-
uint32_t coreStackSize) {
673+
uint32_t coreStackSize, bool reprogramDmas) {
676674
OpPassManager &modulePassManager = variantPassManager.nest<ModuleOp>();
677675
{
678676
FunctionLikeNest funcPassManager(modulePassManager);
@@ -707,7 +705,8 @@ void buildAMDAIETransformPassPipeline(
707705
modulePassManager, packetFlowStrategy, useTilePipeline,
708706
enableVectorizationPasses, enableCoalescingLoops,
709707
enableCollapsingUnitDims, enableFunctionOutlining, callReplication,
710-
insertLoopAroundCoreBlock, numCols, enableCtrlPkt, coreStackSize);
708+
insertLoopAroundCoreBlock, numCols, enableCtrlPkt, coreStackSize,
709+
reprogramDmas);
711710
} else if (useLowerToAIEPipeline == LowerToAIEPassPipeline::AIR) {
712711
addMLIRAIRLoweringPasses(modulePassManager, device, useTilePipeline,
713712
matmulElementwiseFusion,
@@ -733,7 +732,7 @@ void addAMDAIEObjectFifoLoweringPasses(
733732
bool enableCoalescingLoops, bool enableCollapsingUnitDims,
734733
OutliningStrategy enableFunctionOutlining, int callReplication,
735734
bool insertLoopAroundCoreBlock, uint32_t numCols, bool enableCtrlPkt,
736-
uint32_t coreStackSize) {
735+
uint32_t coreStackSize, bool reprogramDmas) {
737736
passManager.addPass(createEraseHALDescriptorTypeFromMemRefPass());
738737
passManager.addPass(memref::createFoldMemRefAliasOpsPass());
739738

@@ -796,18 +795,28 @@ void addAMDAIEObjectFifoLoweringPasses(
796795

797796
passManager.addPass(createCSEPass());
798797
passManager.addPass(createCanonicalizerPass());
799-
passManager.addPass(createAMDAIEAssignLogicalObjectFifoDepthPass());
798+
{
799+
AMDAIEAssignLogicalObjectFifoDepthOptions options;
800+
// TODO(avarma): In case reprogramming Dmas, we currently disable double
801+
// buffering. Relax the constraint later after modifying
802+
// controlcode-lowering and controlcode-to-transaction-binary pass to work
803+
// with double buffering.
804+
if (reprogramDmas) {
805+
options.l2BufferDepth = 1;
806+
options.l1BufferDepth = 1;
807+
}
808+
passManager.addPass(createAMDAIEAssignLogicalObjectFifoDepthPass(options));
809+
}
800810

801811
passManager.addPass(createAMDAIEAssignTilesPass());
802812
passManager.addPass(createCSEPass());
803813
passManager.addPass(createCanonicalizerPass());
804814

805-
passManager.addPass(createAMDAIEDmaToCircularDmaPass());
815+
if (!reprogramDmas) passManager.addPass(createAMDAIEDmaToCircularDmaPass());
816+
806817
{
807818
AMDAIECreateAIEWorkgroupOptions options;
808-
// TODO(avarma): In follow-up PRs this will be replaced by a global flag.
809-
// Currently setting as `false`.
810-
options.reprogramDmas = /*reprogramDmas=*/false;
819+
options.reprogramDmas = reprogramDmas;
811820
passManager.addNestedPass<func::FuncOp>(
812821
createAMDAIECreateAIEWorkgroupPass(options));
813822
}
@@ -874,11 +883,30 @@ void addAMDAIEObjectFifoLoweringPasses(
874883

875884
passManager.addPass(createAMDAIENpuDmaToHalfDmaCpyNdPass());
876885
passManager.addPass(createAMDAIEInsertDmaBdChainPass());
877-
passManager.addPass(createAMDAIEFoldDmaWaitsPass());
878-
passManager.addPass(createAMDAIEControlCodeLoweringPass());
886+
// TODO(avarma): Currently with fold dma wait pass, in case of DMA
887+
// reprogramming we get ALL zeroes. To be triaged/fixed later in order to
888+
// relax this constraint and optimize the wait ops.
889+
if (!reprogramDmas) passManager.addPass(createAMDAIEFoldDmaWaitsPass());
890+
891+
{
892+
AMDAIEControlCodeLoweringOptions options;
893+
options.reprogramDmas = reprogramDmas;
894+
passManager.addPass(createAMDAIEControlCodeLoweringPass(options));
895+
}
896+
if (reprogramDmas) {
897+
passManager.addPass(createAMDAIEAssignBDIDsPass());
898+
{
899+
// For Conv ops use basic sequential scheme to avoid numerical error.
900+
// TODO: Find a better working scheme for Conv ops
901+
AMDAIEAssignBufferAddressOptions options;
902+
if (useTilePipeline == TilePassPipeline::ConvDecomposePipeline)
903+
options.allocScheme = AllocScheme::Sequential;
904+
passManager.addPass(createAMDAIEAssignBufferAddressPass(options));
905+
}
906+
}
879907
passManager.addPass(createAMDAIEControlCodeToTransactionPass());
880908

881-
addAMDAIEToAIEPasses(passManager, insertLoopAroundCoreBlock);
909+
addAMDAIEToAIEPasses(passManager, insertLoopAroundCoreBlock, reprogramDmas);
882910

883911
// Now lower using the AIE passes from MLIR-AIE.
884912
addMLIRAIELoweringPasses(passManager, useTilePipeline);

compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ void addAMDAIEObjectFifoLoweringPasses(
2020
bool enableCoalescingLoops, bool enableCollapsingUnitDims,
2121
OutliningStrategy enableFunctionOutlining, int outliningLoopInCallCount,
2222
bool insertLoopAroundCoreBlock, uint32_t numCols, bool emitCtrlPkt,
23-
uint32_t coreStackSize);
23+
uint32_t coreStackSize, bool reprogramDmas);
2424

2525
/// Add passes to lower from MLIR-AIR through AIE. This is
2626
/// currently the default passes used for lowering after IREEs tiling.
@@ -45,7 +45,7 @@ void buildAMDAIETransformPassPipeline(
4545
PacketFlowStrategy packetFlowStrategy, bool enableCoalescingLoops,
4646
bool enableCollapsingUnitDims, OutliningStrategy enableFunctionOutlining,
4747
int outliningLoopInCallCount, bool insertLoopAroundCoreBlock,
48-
bool emitCtrlPkt, uint32_t coreStackSize);
48+
bool emitCtrlPkt, uint32_t coreStackSize, bool reprogramDmas);
4949

5050
/// Populates passes needed to lower the IR via a Pack-Peel based approach.
5151
void addPackPeelBasedPassPipeline(OpPassManager &passManager,
@@ -272,7 +272,7 @@ std::unique_ptr<OperationPass<ModuleOp>> createAMDAIELoweringStrategyPass(
272272
std::unique_ptr<Pass> createAMDAIELowerFuncArgsPass();
273273

274274
/// Create pass to lower from the AMDAIE dialect to the AIE/AIEX dialects.
275-
void addAMDAIEToAIEPasses(OpPassManager &);
275+
void addAMDAIEToAIEPasses(OpPassManager &pm, bool reprogramDmas);
276276
std::unique_ptr<Pass> createAMDAIELowerToAIEPass(
277277
AMDAIELowerToAIEOptions options = {});
278278

0 commit comments

Comments
 (0)