Skip to content

Commit 7e01df8

Browse files
Zero padding on MemTiles (#1874)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 3b5799b commit 7e01df8

File tree

37 files changed

+767
-77
lines changed

37 files changed

+767
-77
lines changed

include/aie/Dialect/AIE/IR/AIEOps.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1677,7 +1677,8 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol]
16771677
OptionalAttr<AIEI32Attr>:$via_shared_mem,
16781678
// repeat_count==1 means "do it once"
16791679
OptionalAttr<ConfinedAttr<AIEI32Attr, [IntMinValue<1>]>>:$repeat_count,
1680-
InitValuesArrayAttr:$initValues
1680+
InitValuesArrayAttr:$initValues,
1681+
OptionalAttr<BDPadLayoutArrayAttr>:$padDimensions
16811682
);
16821683

16831684
let assemblyFormat = [{
@@ -1717,7 +1718,8 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol]
17171718
OpBuilder<(ins "mlir::StringAttr":$sym_name, "mlir::Value":$producerTile,
17181719
"mlir::ValueRange":$consumerTiles, "mlir::Attribute":$elemNumber, "mlir::Type":$elem_type,
17191720
CArg<"llvm::ArrayRef<AIE::BDDimLayoutAttr>", "{}">:$dimensionsToStream,
1720-
CArg<"llvm::ArrayRef<AIE::BDDimLayoutArrayAttr>", "{}">:$dimensionsFromStreamPerConsumer), [{
1721+
CArg<"llvm::ArrayRef<AIE::BDDimLayoutArrayAttr>", "{}">:$dimensionsFromStreamPerConsumer,
1722+
CArg<"llvm::ArrayRef<AIE::BDPadLayoutArrayAttr>", "{}">:$padDimensions), [{
17211723
odsState.addOperands(producerTile);
17221724
odsState.addOperands(consumerTiles);
17231725
odsState.addAttribute(getSymNameAttrName(odsState.name), sym_name);

include/aie/Dialect/AIEX/IR/AIEX.td

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,13 @@ def AIE_NpuDmaMemcpyNdOp: AIEX_Op<"npu.dma_memcpy_nd", [
570570
OptionalAttr<PacketInfoAttr>:$packet,
571571
FlatSymbolRefAttr:$metadata,
572572
I64Attr:$id,
573-
DefaultValuedOptionalAttr<BoolAttr, "false">:$issue_token
573+
DefaultValuedOptionalAttr<BoolAttr, "false">:$issue_token,
574+
DefaultValuedOptionalAttr<I64Attr, "0">:$d0_zero_before,
575+
DefaultValuedOptionalAttr<I64Attr, "0">:$d1_zero_before,
576+
DefaultValuedOptionalAttr<I64Attr, "0">:$d2_zero_before,
577+
DefaultValuedOptionalAttr<I64Attr, "0">:$d0_zero_after,
578+
DefaultValuedOptionalAttr<I64Attr, "0">:$d1_zero_after,
579+
DefaultValuedOptionalAttr<I64Attr, "0">:$d2_zero_after
574580
);
575581

576582
let assemblyFormat = [{
@@ -828,6 +834,7 @@ def AIE_NpuWriteBdOp: AIEX_Op<"npu.writebd", []> {
828834
I32Attr:$d0_stride,
829835
I32Attr:$d1_size,
830836
I32Attr:$d1_stride,
837+
I32Attr:$d2_size,
831838
I32Attr:$d2_stride,
832839
I32Attr:$iteration_current,
833840
I32Attr:$iteration_size,
@@ -840,7 +847,13 @@ def AIE_NpuWriteBdOp: AIEX_Op<"npu.writebd", []> {
840847
I32Attr:$lock_rel_id,
841848
I32Attr:$lock_acq_enable,
842849
I32Attr:$lock_acq_val,
843-
I32Attr:$lock_acq_id
850+
I32Attr:$lock_acq_id,
851+
I32Attr:$d0_zero_before,
852+
I32Attr:$d1_zero_before,
853+
I32Attr:$d2_zero_before,
854+
I32Attr:$d0_zero_after,
855+
I32Attr:$d1_zero_after,
856+
I32Attr:$d2_zero_after
844857
);
845858
let results = (outs );
846859
let assemblyFormat = [{ attr-dict }];

lib/Dialect/AIE/IR/AIEDialect.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1941,11 +1941,11 @@ LogicalResult DMABDOp::verify() {
19411941
if (!dims.has_value())
19421942
return emitOpError() << "Padding requires n-d data layouts expressed as"
19431943
<< " wrap(s) and stride(s).";
1944+
if (!targetModel.isMemTile(parentTileId.col, parentTileId.row))
1945+
return emitOpError() << "Padding is only supported by memtile dma bds.";
19441946
if (dims->size() != paddims->size())
19451947
return emitOpError() << "Mismatch number of dimensions between padding(s)"
19461948
<< " and wrap(s) and stride(s).";
1947-
if (!targetModel.isMemTile(parentTileId.col, parentTileId.row))
1948-
return emitOpError() << "Padding is only supported by memtile dma bds.";
19491949
int actuallen = 1;
19501950
for (unsigned i = 0; i < paddims->size(); i++) {
19511951
auto dim = (*dims)[i];

lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -513,14 +513,19 @@ struct AIEObjectFifoStatefulTransformPass
513513
void createBd(OpBuilder &builder, LockOp acqLock, int acqMode,
514514
LockAction acqLockAction, LockOp relLock, int relMode,
515515
MyOp buff, int offset, int len, Block *succ,
516-
BDDimLayoutArrayAttr dims) {
516+
BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions) {
517517
if (acqLock)
518518
builder.create<UseLockOp>(builder.getUnknownLoc(), acqLock, acqLockAction,
519519
acqMode);
520-
if (!dims.getValue().empty())
520+
521+
if (!dims.getValue().empty() && padDimensions) {
522+
builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len, dims,
523+
padDimensions);
524+
} else if (!dims.getValue().empty()) {
521525
builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len, dims);
522-
else
526+
} else {
523527
builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len);
528+
}
524529
if (acqLock)
525530
builder.create<UseLockOp>(builder.getUnknownLoc(), relLock,
526531
LockAction::Release, relMode);
@@ -534,7 +539,8 @@ struct AIEObjectFifoStatefulTransformPass
534539
void createBdBlock(OpBuilder &builder, ObjectFifoCreateOp op, int lockMode,
535540
int acqNum, int relNum, MyOp buff, int offset, int len,
536541
DMAChannelDir channelDir, size_t blockIndex, Block *succ,
537-
BDDimLayoutArrayAttr dims) {
542+
BDDimLayoutArrayAttr dims,
543+
BDPadLayoutArrayAttr padDimensions) {
538544
LockOp acqLock;
539545
LockOp relLock;
540546
int acqMode = 1;
@@ -559,20 +565,23 @@ struct AIEObjectFifoStatefulTransformPass
559565
}
560566
}
561567
createBd(builder, acqLock, acqMode, acqLockAction, relLock, relMode, buff,
562-
offset, len, succ, dims);
568+
offset, len, succ, dims, padDimensions);
563569
}
564570

565571
/// Function that either calls createAIETileDMA(), createShimDMA() or
566572
/// createMemTileDMA() based on op tile row value.
567573
void createDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op,
568574
DMAChannelDir channelDir, int channelIndex, int lockMode,
569-
BDDimLayoutArrayAttr dims) {
575+
BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr pad_dims) {
570576
if (op.getProducerTileOp().isShimTile()) {
571577
createShimDMA(device, builder, op, channelDir, channelIndex, lockMode,
572578
dims);
573579
} else if (op.getProducerTileOp().isMemTile()) {
580+
BDPadLayoutArrayAttr padDims = nullptr;
581+
if (channelDir == DMAChannelDir::MM2S && pad_dims)
582+
padDims = pad_dims;
574583
createMemTileDMA(device, builder, op, channelDir, channelIndex, lockMode,
575-
dims);
584+
dims, padDims);
576585
} else {
577586
createAIETileDMA(device, builder, op, channelDir, channelIndex, lockMode,
578587
dims);
@@ -669,7 +678,7 @@ struct AIEObjectFifoStatefulTransformPass
669678
builder.setInsertionPointToStart(curr);
670679
createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
671680
buffersPerFifo[target][blockIndex], /*offset*/ 0,
672-
len, channelDir, blockIndex, succ, dims);
681+
len, channelDir, blockIndex, succ, dims, nullptr);
673682
curr = succ;
674683
blockIndex++;
675684
}
@@ -745,7 +754,7 @@ struct AIEObjectFifoStatefulTransformPass
745754
createBdBlock<ExternalBufferOp>(builder, op, lockMode, acqNum, relNum,
746755
externalBuffersPerFifo[op][blockIndex],
747756
/*offset*/ 0, len, channelDir, blockIndex,
748-
succ, dims);
757+
succ, dims, nullptr);
749758
curr = succ;
750759
blockIndex++;
751760
}
@@ -756,7 +765,8 @@ struct AIEObjectFifoStatefulTransformPass
756765
void createMemTileDMA(DeviceOp &device, OpBuilder &builder,
757766
ObjectFifoCreateOp op, DMAChannelDir channelDir,
758767
int channelIndex, int lockMode,
759-
BDDimLayoutArrayAttr dims) {
768+
BDDimLayoutArrayAttr dims,
769+
BDPadLayoutArrayAttr padDimensions) {
760770
size_t numBlocks = op.size();
761771
if (numBlocks == 0)
762772
return;
@@ -898,7 +908,8 @@ struct AIEObjectFifoStatefulTransformPass
898908
offset = extraOffset;
899909
createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
900910
buffersPerFifo[target][blockIndex], offset,
901-
lenOut, channelDir, blockIndex, succ, dims);
911+
lenOut, channelDir, blockIndex, succ, dims,
912+
padDimensions);
902913
curr = succ;
903914
blockIndex++;
904915
}
@@ -1361,7 +1372,6 @@ struct AIEObjectFifoStatefulTransformPass
13611372
auto consumerWireType = WireBundle::DMA;
13621373
std::set<TileOp>
13631374
objectFifoTiles; // track cores to check for loops during unrolling
1364-
13651375
//===------------------------------------------------------------------===//
13661376
// Split objectFifos into a consumer end and producer end if needed
13671377
//===------------------------------------------------------------------===//
@@ -1511,7 +1521,8 @@ struct AIEObjectFifoStatefulTransformPass
15111521
DMAChannel producerChan =
15121522
dmaAnalysis.getMasterDMAChannel(producer.getProducerTile());
15131523
createDMA(device, builder, producer, producerChan.direction,
1514-
producerChan.channel, 0, producer.getDimensionsToStreamAttr());
1524+
producerChan.channel, 0, producer.getDimensionsToStreamAttr(),
1525+
producer.getPadDimensionsAttr());
15151526
// generate objectFifo allocation info
15161527
builder.setInsertionPoint(&device.getBody()->back());
15171528

@@ -1529,7 +1540,7 @@ struct AIEObjectFifoStatefulTransformPass
15291540
BDDimLayoutArrayAttr consumerDims =
15301541
consumer.getDimensionsFromStreamPerConsumer()[0];
15311542
createDMA(device, builder, consumer, consumerChan.direction,
1532-
consumerChan.channel, 1, consumerDims);
1543+
consumerChan.channel, 1, consumerDims, nullptr);
15331544
// generate objectFifo allocation info
15341545
builder.setInsertionPoint(&device.getBody()->back());
15351546

lib/Dialect/AIEX/IR/AIEXDialect.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,13 @@ LogicalResult AIEX::NpuWriteBdOp::verify() {
459459
return emitOpError("Iteration Size exceeds the [0:63] range.");
460460
if (getIterationStride() > 0xFFFFF)
461461
return emitOpError("Iteration Stride exceeds the [0:1M-1] range.");
462+
if (targetModel.isShimNOCTile(getColumn(), getRow()) && getD2Size() != 0)
463+
return emitOpError("ShimTile only supports 3 dimensions of sizes.");
464+
if (targetModel.isShimNOCTile(getColumn(), getRow()) &&
465+
(getD0ZeroBefore() != 0 || getD0ZeroAfter() != 0 ||
466+
getD1ZeroBefore() != 0 || getD1ZeroAfter() != 0 ||
467+
getD2ZeroBefore() != 0 || getD2ZeroAfter() != 0))
468+
return emitOpError("ShimTile doesn't support zero padding.");
462469
return success();
463470
}
464471

lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ struct AIECtrlPacketToDmaPass : AIECtrlPacketToDmaBase<AIECtrlPacketToDmaPass> {
129129
SmallVector<Value>{}, SmallVector<Value>{},
130130
SmallVector<Value>{}, ArrayRef(staticOffsets),
131131
ArrayRef(staticSizes), ArrayRef(staticStrides),
132-
controllerIdPkt, metadata, 0, true);
132+
controllerIdPkt, metadata, 0, true, 0, 0, 0, 0, 0, 0);
133133

134134
auto shimRow = builder.getI32IntegerAttr(0);
135135
auto shimCol = builder.getI32IntegerAttr(col);

lib/Dialect/AIEX/Transforms/AIEDMATasksToNPU.cpp

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,8 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
216216
}
217217

218218
LogicalResult rewriteSingleBD(OpBuilder &builder, Block &block,
219-
AIE::TileOp &tile) {
219+
AIE::TileOp &tile,
220+
AIE::DMAChannelDir channelDir) {
220221
AIE::DMABDOp bd_op = getBdForBlock(block);
221222
const auto &target_model = AIE::getTargetModel(bd_op);
222223
MemRefType buffer_type = bd_op.getBuffer().getType();
@@ -237,12 +238,23 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
237238
<< len << " bytes falls below minimum hardware transfer unit of "
238239
<< (addr_granularity / 8) << " bytes.";
239240
}
240-
241241
// Process strides/wraps
242242
std::optional<llvm::ArrayRef<AIE::BDDimLayoutAttr>> dims =
243243
bd_op.getDimensions();
244244
llvm::SmallVector<int64_t, 4> sizes = llvm::SmallVector<int64_t, 4>(4, 0);
245245
llvm::SmallVector<int64_t, 4> strides = llvm::SmallVector<int64_t, 4>(4, 0);
246+
247+
// Padding
248+
std::optional<llvm::ArrayRef<AIE::BDPadLayoutAttr>> padDims =
249+
bd_op.getPadDimensions();
250+
llvm::SmallVector<int64_t, 4> padBefore =
251+
llvm::SmallVector<int64_t, 4>(4, 0);
252+
llvm::SmallVector<int64_t, 4> padAfter =
253+
llvm::SmallVector<int64_t, 4>(4, 0);
254+
std::fill(padBefore.begin(), padBefore.end(), 0);
255+
std::fill(padAfter.begin(), padAfter.end(), 0);
256+
int d2size = 0;
257+
246258
if (dims && dims->size() > 0) {
247259
llvm::SmallVector<int64_t, 4> input_sizes =
248260
llvm::SmallVector<int64_t, 4>(4, 1);
@@ -252,6 +264,7 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
252264
return bd_op->emitOpError("At most four data layout transformation "
253265
"dimensions may be provided.");
254266
}
267+
255268
for (size_t i = 0; i < dims->size(); i++) {
256269
// Pass down dimensions in reverse order; in the MLIR, this allows
257270
// us to specify step sizes/wraps in the same order as we would
@@ -260,6 +273,33 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
260273
input_sizes[i] = (*dims)[j].getSize();
261274
input_strides[i] = (*dims)[j].getStride();
262275
}
276+
if (dims->size() > 2) {
277+
d2size = (target_model.isMemTile(tile.getCol(), tile.getRow()))
278+
? (*dims)[2].getSize()
279+
: 0;
280+
}
281+
if (padDims.has_value()) {
282+
if (!target_model.isMemTile(tile.getCol(), tile.getRow()))
283+
return bd_op->emitOpError()
284+
<< "Padding is only supported by memtile dma bds.";
285+
if (padDims->size() > dims->size())
286+
return bd_op->emitOpError()
287+
<< "Mismatch number of dimensions between padding(s)"
288+
<< " and wrap(s) and stride(s).";
289+
if (channelDir == AIE::DMAChannelDir::MM2S) {
290+
for (size_t i = 0; i < padDims->size(); i++) {
291+
int j = padDims->size() - i - 1;
292+
padBefore[i] = (*padDims)[j].getConstPadBefore();
293+
padAfter[i] = (*padDims)[j].getConstPadAfter();
294+
}
295+
for (size_t i = padDims->size(); i < dims->size(); i++) {
296+
padBefore[i] = 0;
297+
padAfter[i] = 0;
298+
}
299+
} else
300+
return bd_op->emitOpError()
301+
<< "supports padding only for MM2S direction on MemTiles.";
302+
}
263303
getHardwareStridesWraps(target_model, buffer_type, input_sizes,
264304
input_strides, sizes, strides);
265305
if (failed(verifyStridesWraps(bd_op, buffer_type, tile.getCol(),
@@ -290,8 +330,16 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
290330
"transfer length, as this is the BD repeat count.";
291331
return failure();
292332
}
333+
} else {
334+
if (padDims && target_model.isMemTile(tile.getCol(), tile.getRow()) &&
335+
channelDir == AIE::DMAChannelDir::MM2S) {
336+
return bd_op->emitOpError()
337+
<< "Padding requires n-d data layouts expressed as "
338+
<< "wrap(s) and stride(s).";
339+
} else if (padDims) {
340+
return bd_op->emitOpError() << "Padding is supported only on MemTiles.";
341+
}
293342
}
294-
295343
// find next BD ID, if any
296344
uint32_t use_next_bd = 0;
297345
uint32_t next_bd_id = 0;
@@ -306,7 +354,7 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
306354
/* TODO: Strides/Wraps */
307355
/*d0_size=*/sizes[0], /*d0_stride=*/strides[0],
308356
/*d1_size=*/sizes[1], /*d1_stride=*/strides[1],
309-
/*d2_stride=*/strides[2],
357+
/*d2_size=*/d2size, /*d2_stride=*/strides[2],
310358
/*iteration_current=*/0, /*iteration_size=*/sizes[3],
311359
/*iteration_stride=*/strides[3],
312360
/* TODO: Next BD */
@@ -316,7 +364,10 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
316364
/*valid_bd=*/1,
317365
/* TODO: Locks */
318366
/*lock_rel_val=*/0, /*lock_rel_id=*/0, /*lock_acq_enable=*/0,
319-
/*lock_acq_val=*/0, /*lock_ackq_id=*/0);
367+
/*lock_acq_val=*/0, /*lock_ackq_id=*/0, /*d0_zero_before=*/padBefore[0],
368+
/*d1_zero_before=*/padBefore[1], /*d2_zero_before=*/padBefore[2],
369+
/*d0_zero_after=*/padAfter[0], /*d1_zero_after=*/padAfter[1],
370+
/*d2_zero_after=*/padAfter[2]);
320371

321372
return setAddressForSingleBD(builder, bd_op, tile);
322373
}
@@ -392,13 +443,15 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
392443
return failure();
393444
}
394445

446+
auto channelDir = op.getDirection();
447+
395448
// Lower all BDs
396449
for (auto it = body.begin(); it != body.end(); ++it) {
397450
Block &block = *it;
398451
if (shouldSkipBlock(block)) {
399452
continue;
400453
}
401-
if (failed(rewriteSingleBD(builder, block, tile))) {
454+
if (failed(rewriteSingleBD(builder, block, tile, channelDir))) {
402455
return failure();
403456
}
404457
}

0 commit comments

Comments
 (0)