diff --git a/mlir/lib/Conversion/AIRToAIEPass.cpp b/mlir/lib/Conversion/AIRToAIEPass.cpp index 5764536eb..02b4e2bdb 100644 --- a/mlir/lib/Conversion/AIRToAIEPass.cpp +++ b/mlir/lib/Conversion/AIRToAIEPass.cpp @@ -58,6 +58,12 @@ struct AIRToAIEConversionOptions { AIE::AIEDevice device; }; +struct link_ends_interface { + int numLinkEnds; + std::vector> input_ofs_with_offsets; + std::vector> output_ofs_with_offsets; +}; + // get memcpy operation volumn (elements) as int int getMemcpySizesAsInt(Value memref, SmallVector sizes) { MemRefType memTy = llvm::cast(memref.getType()); @@ -95,8 +101,7 @@ struct ShimTileAllocator { for (int i = 0, e = aie_target.columns(); i < e; i++) { if (aie_target.isShimNOCTile(i, 0)) { shim_columns.push_back(i); - shim_dma_channels = aie_target.getNumDestSwitchboxConnections( - i, 0, AIE::WireBundle::FIFO); + shim_dma_channels = 2; } } } @@ -1155,14 +1160,15 @@ struct LowerAIRChannelsPattern : public OpRewritePattern { LowerAIRChannelsPattern( MLIRContext *ctx, ShimTileAllocator &shimTileAlloc, std::map &bufferToMemtileMap, - std::map &linksToComplete) + std::map &linkEnds) : OpRewritePattern(ctx), shimTileAlloc(shimTileAlloc), - bufferToMemtileMap(bufferToMemtileMap), - linksToComplete(linksToComplete) {} + bufferToMemtileMap(bufferToMemtileMap), linkEnds(linkEnds) {} LogicalResult matchAndRewrite(air::ChannelOp channel, PatternRewriter &rewriter) const override { auto device = channel->getParentOfType(); + const auto &target_model = device.getTargetModel(); + bool isAIE2 = (target_model.getTargetArch() == AIE::AIEArch::AIE2); auto ctx = device->getContext(); if (!device) return failure(); @@ -1171,21 +1177,24 @@ struct LowerAIRChannelsPattern : public OpRewritePattern { if (channel.getBundleSize() > 1) return failure(); - AIE::AIEObjectFifoType datatype; + std::pair datatype = {(int)air::MemorySpace::L3, {}}; std::vector channelPuts = getChannelPutOpThroughSymbol(channel, device); std::vector channelGets = getChannelGetOpThroughSymbol(channel, device); - channel->print(llvm::outs()); - llvm::outs() << "channelPuts" << channelPuts.size() << "\n"; - llvm::outs() << "channelGets" << channelGets.size() << "\n"; + // channel->print(llvm::outs()); + // llvm::outs() << "channelPuts" << channelPuts.size() << "\n"; + // llvm::outs() << "channelGets" << channelGets.size() << "\n"; - // keep track of potential LinkOp - bool linkToComplete = - false; // track if objFifo has to be added to linksToComplete - bool linkFound = false; // all ends of a link have been found - Operation *endOfLink; // one end of a link + // variables to track LinkOp, i.e., a put and get using the same + // AIE.BufferOp + bool linkFound = false; // a link end is found + Operation *endOfLink = nullptr; // one end of a LinkOp (i.e., a put or get) + int numLinkEnds = 0; // # ends in this link (i.e., # users of AIE.BufferOp) + + AIE::BDDimLayoutArrayAttr dimensionsToStream = + AIE::BDDimLayoutArrayAttr::get(channel->getContext(), {}); // put/get come in pairs, if one is missing then it's L3 Value producerTile; @@ -1198,26 +1207,45 @@ struct LowerAIRChannelsPattern : public OpRewritePattern { if (res.failed()) return res; + setChannelBufferResources(rewriter, channel, + channelPuts[0].getOperation()); + // check if this put is linked to a get from another channel - MemRefType memref = + MemRefType memrefType = llvm::cast(channelPuts[0].getMemref().getType()); - int mem_space = memref.getMemorySpaceAsInt(); + int mem_space = memrefType.getMemorySpaceAsInt(); if (mem_space == (int)air::MemorySpace::L2) { - if (linksToComplete.find(channelPuts[0].getOperation()) != - linksToComplete.end()) { + linkFound = + detectLinkEnd(rewriter, channelPuts[0], &numLinkEnds); + if (linkFound) endOfLink = channelPuts[0].getOperation(); - linkFound = true; - } else { - AIE::BufferOp buff = dyn_cast( - channelPuts[0].getMemref().getDefiningOp()); - for (auto user : buff->getUsers()) { - if (auto pairedGet = dyn_cast(user)) { - endOfLink = pairedGet.getOperation(); - linkToComplete = true; - } - } - } } + + // get data layout transformation on channel put + auto ndcpy = cast(channelPuts[0].getOperation()); + SmallVector sizes = isTileInbound(ndcpy, (int)air::MemorySpace::L1) + ? ndcpy.getDstSizes() + : ndcpy.getSrcSizes(); + SmallVector strides = + isTileInbound(ndcpy, (int)air::MemorySpace::L1) + ? ndcpy.getDstStrides() + : ndcpy.getSrcStrides(); + if (!strides.empty() && !sizes.empty()) + if (auto const_highest_stride = getConstantIntValue(strides[0])) + if (*const_highest_stride == 0) { + strides.erase(strides.begin()); + sizes.erase(sizes.begin()); + } + std::vector dims = + getWrapsAndStrides(sizes, strides, ndcpy->getContext()); + auto wraps_and_strides = + AIE::BDDimLayoutArrayAttr::get(ndcpy->getContext(), ArrayRef(dims)); + bool useDefaultDataAccessPattern = + isAIE2 ? isDefaultDataAccessPattern(sizes, strides, + channelPuts[0].getMemref()) + : true; + if (!wraps_and_strides.getValue().empty() && !useDefaultDataAccessPattern) + dimensionsToStream = wraps_and_strides; } else { // put from L3 producerTile = shimTileAlloc.getShimTile( @@ -1225,8 +1253,10 @@ struct LowerAIRChannelsPattern : public OpRewritePattern { channel.getName().str()); } + std::vector dimsFromStreamPerConsumer; + // put/get come in pairs, if one is missing then it's L3 - std::vector consumers; + std::set consumers; Value consumerTile; if (channelGets.size() > 1 && !channel.isBroadcast()) return channel.emitOpError("has multiple gets but no broadcast shape"); @@ -1237,62 +1267,96 @@ struct LowerAIRChannelsPattern : public OpRewritePattern { findChannelPutGetTile(get, &consumerTile, &datatype); if (res.failed()) return res; - consumers.push_back(consumerTile); + consumers.insert(consumerTile.getDefiningOp()); + + setChannelBufferResources(rewriter, channel, get.getOperation()); // check if this get is linked to a put from another channel - MemRefType memref = llvm::cast(get.getMemref().getType()); - int mem_space = memref.getMemorySpaceAsInt(); + MemRefType memrefType = llvm::cast(get.getMemref().getType()); + int mem_space = memrefType.getMemorySpaceAsInt(); if (mem_space == (int)air::MemorySpace::L2) { - if (linksToComplete.find(get.getOperation()) != linksToComplete.end()) { + linkFound = detectLinkEnd(rewriter, get, &numLinkEnds); + if (linkFound) endOfLink = get.getOperation(); - linkFound = true; - } else { - AIE::BufferOp buff = - dyn_cast(get.getMemref().getDefiningOp()); - for (auto user : buff->getUsers()) { - if (auto pairedPut = dyn_cast(user)) { - endOfLink = pairedPut.getOperation(); - linkToComplete = true; - } - } - } } + + // get data layout transformation on channel get + auto ndcpy = cast(get.getOperation()); + SmallVector sizes = isTileInbound(ndcpy, (int)air::MemorySpace::L1) + ? ndcpy.getDstSizes() + : ndcpy.getSrcSizes(); + SmallVector strides = + isTileInbound(ndcpy, (int)air::MemorySpace::L1) + ? ndcpy.getDstStrides() + : ndcpy.getSrcStrides(); + if (!strides.empty() && !sizes.empty()) + if (auto const_highest_stride = getConstantIntValue(strides[0])) + if (*const_highest_stride == 0) { + strides.erase(strides.begin()); + sizes.erase(sizes.begin()); + } + std::vector dims = + getWrapsAndStrides(sizes, strides, ndcpy->getContext()); + auto wraps_and_strides = + AIE::BDDimLayoutArrayAttr::get(ndcpy->getContext(), ArrayRef(dims)); + bool useDefaultDataAccessPattern = + isAIE2 ? isDefaultDataAccessPattern(sizes, strides, get.getMemref()) + : true; + if (!wraps_and_strides.getValue().empty() && !useDefaultDataAccessPattern) + dimsFromStreamPerConsumer.push_back(wraps_and_strides); } for (int i = 0; i < expectedGets - (int)channelGets.size(); i++) { // get from L3 consumerTile = shimTileAlloc.getShimTile( device, (int)air::MemorySpace::L1, (int)air::MemorySpace::L3, channel.getName().str()); - consumers.push_back(consumerTile); + consumers.insert(consumerTile.getDefiningOp()); } + if ((int)consumers.size() != expectedGets) + return channel.emitOpError( + "number of channel gets does not match broadcast shape"); - if (!datatype) - return failure(); + if (datatype.first == (int)air::MemorySpace::L3) + return channel.emitOpError( + "could not infer datatype of Object FIFO elements"); // create objFifo rewriter.setInsertionPoint(*(device.getOps().begin())); + AIE::BDDimLayoutArrayAttr emptyDims = + AIE::BDDimLayoutArrayAttr::get(channel->getContext(), {}); + auto dimensionsFromStreamPerConsumer = AIE::BDDimLayoutArrayArrayAttr::get( + channel->getContext(), ArrayRef(emptyDims)); + if (dimsFromStreamPerConsumer.size() > 0) + dimensionsFromStreamPerConsumer = AIE::BDDimLayoutArrayArrayAttr::get( + channel->getContext(), ArrayRef(dimsFromStreamPerConsumer)); + std::vector consumerTiles; + for (auto tile : consumers) + consumerTiles.push_back(tile); AIE::ObjectFifoCreateOp objFifo = createObjectFifo( - rewriter, datatype, producerTile, consumers, - channel.getBufferResources(), "air_" + channel.getName().str()); + rewriter, datatype.second, producerTile, consumerTiles, + channel.getBufferResources(), "air_" + channel.getName().str(), + dimensionsToStream, dimensionsFromStreamPerConsumer); - // if this channel's get is linked with another put, register it - if (linkToComplete) - linksToComplete[endOfLink] = objFifo; - // once the corresponding objFifo has been made, complete the link + // if a link end was found if (linkFound) { - AIE::ObjectFifoCreateOp producerFifo = linksToComplete[endOfLink]; - if (isa(endOfLink)) - rewriter.create( - rewriter.getUnknownLoc(), - rewriter.getArrayAttr({SymbolRefAttr::get(ctx, objFifo.name())}), - rewriter.getArrayAttr( - {SymbolRefAttr::get(ctx, producerFifo.name())})); - else - rewriter.create( - rewriter.getUnknownLoc(), - rewriter.getArrayAttr( - {SymbolRefAttr::get(ctx, producerFifo.name())}), - rewriter.getArrayAttr({SymbolRefAttr::get(ctx, objFifo.name())})); + AIE::BufferOp buff; + // if get: add to input objectFifo vector + if (auto get = dyn_cast(endOfLink)) { + buff = dyn_cast(get.getMemref().getDefiningOp()); + SmallVector offsets = get.getDstOffsets(); + SmallVector strides = get.getDstStrides(); + int64_t offset = get1DOffset(offsets, strides); + addLinkEnd(ctx, buff, /* isInput */ true, numLinkEnds, objFifo, offset); + // if put: add to output objectFifo vector + } else if (auto put = dyn_cast(endOfLink)) { + buff = dyn_cast(put.getMemref().getDefiningOp()); + SmallVector offsets = put.getSrcOffsets(); + SmallVector strides = put.getSrcStrides(); + int64_t offset = get1DOffset(offsets, strides); + addLinkEnd(ctx, buff, /* isInput */ false, numLinkEnds, objFifo, + offset); + } + createLink(rewriter, buff); } // replace put/get and any associated memref alloc/dealloc @@ -1348,12 +1412,16 @@ struct LowerAIRChannelsPattern : public OpRewritePattern { private: // find AIE cores and their tiles based on memory hierarchy levels template - LogicalResult findChannelPutGetTile(MyOp op, Value *tile, - AIE::AIEObjectFifoType *datatype) const { + LogicalResult + findChannelPutGetTile(MyOp op, Value *tile, + std::pair *datatype) const { MemRefType memref = llvm::cast(op.getMemref().getType()); int mem_space = memref.getMemorySpaceAsInt(); - *datatype = AIE::AIEObjectFifoType::get( - MemRefType::get(memref.getShape(), memref.getElementType())); + // remove mem_space from memref for objFifo datatype + if (datatype->first != (int)air::MemorySpace::L1) { + *datatype = {mem_space, memref}; + //MemRefType::get(memref.getShape(), memref.getElementType())}; + } if (mem_space == (int)air::MemorySpace::L1) { AIE::CoreOp core = op->template getParentOfType(); if (!core) @@ -1376,16 +1444,29 @@ struct LowerAIRChannelsPattern : public OpRewritePattern { } } - AIE::ObjectFifoCreateOp createObjectFifo(OpBuilder &builder, - AIE::AIEObjectFifoType datatype, - Value prodTile, - const std::vector &consTile, - int depth, StringRef name) const { - AIE::ObjectFifoCreateOp fifo = builder.create( - builder.getUnknownLoc(), builder.getStringAttr(name), prodTile, - consTile, builder.getIntegerAttr(builder.getI32Type(), depth), - datatype); - return fifo; + void setChannelBufferResources(PatternRewriter &rewriter, + air::ChannelOp channel, Operation *op) const { + if (channel->hasAttr("buffer_resources")) + return; + auto for_op = op->getParentOfType(); + if (!for_op) + return; + if (for_op->hasAttr("unroll")) { + auto unroll_factor = for_op->getAttrOfType("unroll"); + channel->setAttr("buffer_resources", unroll_factor); + } + } + + AIE::ObjectFifoCreateOp createObjectFifo( + PatternRewriter &rewriter, MemRefType datatype, Value prodTile, + const std::vector &consTile, int depth, StringRef name, + AIE::BDDimLayoutArrayAttr dimensionsToStream, + AIE::BDDimLayoutArrayArrayAttr dimensionsFromStreamPerConsumer) const { + return rewriter.create( + rewriter.getUnknownLoc(), rewriter.getStringAttr(name), prodTile, + consTile, rewriter.getIntegerAttr(rewriter.getI32Type(), depth), + AIE::AIEObjectFifoType::get(datatype), dimensionsToStream, + dimensionsFromStreamPerConsumer); } template @@ -1445,9 +1526,96 @@ struct LowerAIRChannelsPattern : public OpRewritePattern { } } + template + bool detectLinkEnd(PatternRewriter &rewriter, MyOp op, + int *numLinkEnds) const { + // check if this put is linked to a get from another channel that... + bool found = false; + int numEnds = 0; + AIE::BufferOp buff = + dyn_cast(op.getMemref().getDefiningOp()); + // ... has already been found in another channel + if (linkEnds.find(buff) != linkEnds.end()) { + return true; + // ... has not been found yet + } else { + for (auto user : buff->getUsers()) { + if (isa(op)) { + if (isa(user)) + found = true; + if (isa(user) || isa(user)) + numEnds++; + } else if (isa(op)) { + if (isa(user)) + found = true; + if (isa(user) || isa(user)) + numEnds++; + } + } + } + *numLinkEnds = numEnds; + return found; + } + + void addLinkEnd(MLIRContext *ctx, AIE::BufferOp buff, bool isInput, + int numLinkEnds, AIE::ObjectFifoCreateOp objFifo, + int64_t offset) const { + if (linkEnds.find(buff) == linkEnds.end()) { + std::vector> input_ofs; + std::vector> output_ofs; + linkEnds[buff] = {numLinkEnds, input_ofs, output_ofs}; + } + if (isInput) + linkEnds[buff].input_ofs_with_offsets.push_back( + {SymbolRefAttr::get(ctx, objFifo.name()), offset}); + else + linkEnds[buff].output_ofs_with_offsets.push_back( + {SymbolRefAttr::get(ctx, objFifo.name()), offset}); + } + + static bool sortLinkObjectFifos(std::pair op0, + std::pair op1) { + return op0.second < op1.second; + } + + void createLink(PatternRewriter &rewriter, AIE::BufferOp buff) const { + auto numEnds = linkEnds[buff].numLinkEnds; + auto input_pairs = linkEnds[buff].input_ofs_with_offsets; + auto output_pairs = linkEnds[buff].output_ofs_with_offsets; + std::vector input_ofs; + std::vector output_ofs; + //std::sort(input_pairs.begin(), input_pairs.end(), sortLinkObjectFifos); + //std::sort(output_pairs.begin(), output_pairs.end(), sortLinkObjectFifos); + // retrieve only objectFifo symbol ref attributes + std::vector srcOffsets; + std::vector dstOffsets; + for (auto p : input_pairs) { + input_ofs.push_back(p.first); + auto offset = rewriter.create( + rewriter.getUnknownLoc(), rewriter.getIndexAttr(p.second)); + srcOffsets.push_back(offset->getResult(0)); + } + for (auto p : output_pairs) { + output_ofs.push_back(p.first); + auto offset = rewriter.create( + rewriter.getUnknownLoc(), rewriter.getIndexAttr(p.second)); + dstOffsets.push_back(offset->getResult(0)); + } + // check if all ends have been found + // if yes, create ObjectFifoLinkOp + if ((int)input_ofs.size() + (int)output_ofs.size() == numEnds) { + rewriter.create( + rewriter.getUnknownLoc(), rewriter.getArrayAttr(ArrayRef(input_ofs)), + rewriter.getArrayAttr(ArrayRef(output_ofs)), + srcOffsets, dstOffsets); + } + } + ShimTileAllocator &shimTileAlloc; std::map &bufferToMemtileMap; - std::map &linksToComplete; + std::map &linkEnds; + // map L2 AIE.BufferOps to pairs of vectors of SymbolRefAttr + // () }; // This function replaces ChannelPutOp/ChannelGetOp with AIE_CreateObjectFifoOps @@ -1459,9 +1627,9 @@ void lowerAIRChannels( std::map &bufferToMemtileMap) { auto ctx = d->getContext(); RewritePatternSet patterns(ctx); - std::map linksToComplete; + std::map linkEnds; patterns.insert(ctx, s, bufferToMemtileMap, - linksToComplete); + linkEnds); (void)applyPatternsAndFoldGreedily(d, std::move(patterns)); } @@ -3084,10 +3252,10 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase { builder.getUnknownLoc(), AIE::AIEDeviceAttr::get(builder.getContext(), *device)); ShimTileAllocator shimTileAlloc(deviceOp.getTargetModel()); - std::map linksToComplete; + std::map linkEnds; if (clTestPatterns.find("lower-air-channels") != std::string::npos) { - patterns.insert( - ctx, shimTileAlloc, bufferToMemtileMap, linksToComplete); + patterns.insert(ctx, shimTileAlloc, + bufferToMemtileMap, linkEnds); } if (clTestPatterns.find("lower-air-ping-pong") != std::string::npos) { patterns.insert(ctx); diff --git a/mlir/test/Conversion/AIRToAIE/air_channel_to_objectfifo_distribute.mlir b/mlir/test/Conversion/AIRToAIE/air_channel_to_objectfifo_distribute.mlir new file mode 100755 index 000000000..b78bd8572 --- /dev/null +++ b/mlir/test/Conversion/AIRToAIE/air_channel_to_objectfifo_distribute.mlir @@ -0,0 +1,85 @@ +//===- air_channel_to_objectfifo_distribute.mlir ----------------*- MLIR -*-===// +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// SPDX-License-Identifier: MIT +// +//===----------------------------------------------------------------------===// + +// RUN: air-opt %s -air-place-herds='num-rows=2 num-cols=2 row-anchor=3 col-anchor=5' --air-to-aie='use-objectfifo=true device=xcve2802' --canonicalize | FileCheck %s + +// CHECK-LABEL: aie.device(xcve2802) { +// CHECK: %[[VAL_0:.*]] = aie.tile(2, 0) +// CHECK: %[[VAL_1:.*]] = aie.tile(1, 1) +// CHECK: %[[VAL_2:.*]] = aie.tile(5, 3) +// CHECK: %[[VAL_3:.*]] = aie.tile(5, 4) +// CHECK: aie.objectfifo @air_channel_3(%[[VAL_1]], {%[[VAL_3]]}, 1 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo @air_channel_2(%[[VAL_1]], {%[[VAL_2]]}, 1 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo @air_channel_0(%[[VAL_0]], {%[[VAL_1]]}, 1 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo.link [@air_channel_0] -> [@air_channel_2, @air_channel_3]() +// CHECK: %[[VAL_4:.*]] = aie.core(%[[VAL_3]]) { +// CHECK: %[[VAL_5:.*]] = aie.objectfifo.acquire @air_channel_3(Consume, 1) : !aie.objectfifosubview> +// CHECK: %[[VAL_6:.*]] = aie.objectfifo.subview.access %[[VAL_5]][0] : !aie.objectfifosubview> -> memref<16xi32> +// CHECK: aie.objectfifo.release @air_channel_3(Consume, 1) +// CHECK: aie.end +// CHECK: } {elf_file = "segment_0_core_5_4.elf"} +// CHECK: %[[VAL_5:.*]] = aie.core(%[[VAL_2]]) { +// CHECK: %[[VAL_6:.*]] = aie.objectfifo.acquire @air_channel_2(Consume, 1) : !aie.objectfifosubview> +// CHECK: %[[VAL_7:.*]] = aie.objectfifo.subview.access %[[VAL_6]][0] : !aie.objectfifosubview> -> memref<16xi32> +// CHECK: aie.objectfifo.release @air_channel_2(Consume, 1) +// CHECK: aie.end +// CHECK: } {elf_file = "segment_0_core_5_3.elf"} +// CHECK: } + +module { + air.channel @channel_0 [1, 1] + air.channel @channel_1 [1, 2] + func.func @L2toL1(%arg0: memref<32xi32>) { + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + + air.launch (%arg1, %arg2) in (%arg3=%c1, %arg4=%c2) args(%arg5=%arg0) : memref<32xi32> attributes {id = 1 : i32} { + %async_token, %results = air.execute -> (memref<32xi32>) { + %alloc = memref.alloc() {alignment = 32 : i64} : memref<32xi32> + air.execute_terminator %alloc : memref<32xi32> + } + %async_token_0 = air.execute [%async_token] { + memref.copy %arg5, %results : memref<32xi32> to memref<32xi32> + } + %0 = air.wait_all async + %1 = air.channel.put async [%async_token_0, %0] @channel_0[] (%results[] [] []) {id = 2 : i32} : (memref<32xi32>) + + %2 = air.segment async args(%arg6=%arg1, %arg7=%arg2) : index, index attributes {id = 3 : i32} { + %3 = air.wait_all async + %async_token_1, %results_1 = air.execute -> (memref<32xi32, 1>) { + %alloc1 = memref.alloc() : memref<32xi32, 1> + air.execute_terminator %alloc1 : memref<32xi32, 1> + } + %c0_2 = arith.constant 0 : index + %c1_2 = arith.constant 1 : index + %c2_2 = arith.constant 2 : index + %c16 = arith.constant 16 : index + %c32 = arith.constant 32 : index + %4 = air.channel.get async [%3] @channel_0[] (%results_1[] [] []) {id = 4 : i32} : (memref<32xi32, 1>) + %5 = air.wait_all async [%4] + %6 = air.channel.put async [%5] @channel_1[%c0_2, %c0_2] (%results_1[%c0_2] [%c16] []) {id = 5 : i32} : (memref<32xi32, 1>) + %7 = air.channel.put async [%5] @channel_1[%c0_2, %c1_2] (%results_1[%c16] [%c16] []) {id = 6 : i32} : (memref<32xi32, 1>) + + %8 = air.herd @herd_0 async [%4] tile (%arg8, %arg9) in (%arg10=%c1_2, %arg11=%c2_2) attributes {id = 7 : i32} { + %9 = air.wait_all async + %async_token_2, %results_2 = air.execute -> (memref<16xi32, 2>) { + %alloc2 = memref.alloc() : memref<16xi32, 2> + air.execute_terminator %alloc2 : memref<16xi32, 2> + } + %10 = air.channel.get async [%async_token_2, %9] @channel_1[%arg8, %arg9] (%results_2[] [] []) {id = 8 : i32} : (memref<16xi32, 2>) + %async_token_3 = air.execute [%10] { + memref.dealloc %results_2 : memref<16xi32, 2> + } + air.herd_terminator + } + air.segment_terminator + } + air.launch_terminator + } + return + } +} diff --git a/mlir/test/Conversion/AIRToAIE/air_channel_to_objectfifo_join.mlir b/mlir/test/Conversion/AIRToAIE/air_channel_to_objectfifo_join.mlir new file mode 100755 index 000000000..ba04ae13c --- /dev/null +++ b/mlir/test/Conversion/AIRToAIE/air_channel_to_objectfifo_join.mlir @@ -0,0 +1,82 @@ +//===- air_channel_to_objectfifo_join.mlir ----------------------*- MLIR -*-===// +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// SPDX-License-Identifier: MIT +// +//===----------------------------------------------------------------------===// + +// RUN: air-opt %s -air-place-herds='num-rows=2 num-cols=2 row-anchor=3 col-anchor=5' --air-to-aie='use-objectfifo=true device=xcve2802' --canonicalize | FileCheck %s + +// CHECK-LABEL: aie.device(xcve2802) { +// CHECK: %[[VAL_0:.*]] = aie.tile(2, 0) +// CHECK: %[[VAL_1:.*]] = aie.tile(1, 1) +// CHECK: %[[VAL_2:.*]] = aie.tile(5, 3) +// CHECK: %[[VAL_3:.*]] = aie.tile(5, 4) +// CHECK: aie.objectfifo @air_channel_0(%[[VAL_1]], {%[[VAL_0]]}, 1 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo @air_channel_3(%[[VAL_3]], {%[[VAL_1]]}, 1 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo @air_channel_2(%[[VAL_2]], {%[[VAL_1]]}, 1 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo.link [@air_channel_2, @air_channel_3] -> [@air_channel_0]() +// CHECK: %[[VAL_4:.*]] = aie.core(%[[VAL_3]]) { +// CHECK: %[[VAL_5:.*]] = aie.objectfifo.acquire @air_channel_3(Produce, 1) : !aie.objectfifosubview> +// CHECK: %[[VAL_6:.*]] = aie.objectfifo.subview.access %[[VAL_5]][0] : !aie.objectfifosubview> -> memref<16xi32> +// CHECK: aie.objectfifo.release @air_channel_3(Produce, 1) +// CHECK: aie.end +// CHECK: } {elf_file = "segment_0_core_5_4.elf"} +// CHECK: %[[VAL_5:.*]] = aie.core(%[[VAL_2]]) { +// CHECK: %[[VAL_6:.*]] = aie.objectfifo.acquire @air_channel_2(Produce, 1) : !aie.objectfifosubview> +// CHECK: %[[VAL_7:.*]] = aie.objectfifo.subview.access %[[VAL_6]][0] : !aie.objectfifosubview> -> memref<16xi32> +// CHECK: aie.objectfifo.release @air_channel_2(Produce, 1) +// CHECK: aie.end +// CHECK: } {elf_file = "segment_0_core_5_3.elf"} +// CHECK: } + +module { + air.channel @channel_0 [1, 1] + air.channel @channel_1 [1, 2] + func.func @L1toL2(%arg0: memref<32xi32>) { + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + + air.launch (%arg1, %arg2) in (%arg3=%c1, %arg4=%c2) args(%arg5=%arg0) : memref<32xi32> attributes {id = 1 : i32} { + %async_token, %results = air.execute -> (memref<32xi32>) { + %alloc = memref.alloc() {alignment = 32 : i64} : memref<32xi32> + air.execute_terminator %alloc : memref<32xi32> + } + %0 = air.wait_all async + %1 = air.channel.get async [%async_token, %0] @channel_0[] (%results[] [] []) {id = 2 : i32} : (memref<32xi32>) + + %2 = air.segment async args(%arg6=%arg1, %arg7=%arg2) : index, index attributes {id = 3 : i32} { + %c0_2 = arith.constant 0 : index + %c1_2 = arith.constant 1 : index + %c2_2 = arith.constant 2 : index + %c16 = arith.constant 16 : index + %c32 = arith.constant 32 : index + + %3 = air.herd @herd_0 async tile (%arg8, %arg9) in (%arg10=%c1_2, %arg11=%c2_2) attributes {id = 4 : i32} { + %9 = air.wait_all async + %async_token_2, %results_2 = air.execute -> (memref<16xi32, 2>) { + %alloc2 = memref.alloc() : memref<16xi32, 2> + air.execute_terminator %alloc2 : memref<16xi32, 2> + } + %10 = air.channel.put async [%async_token_2, %9] @channel_1[%arg8, %arg9] (%results_2[] [] []) {id = 5 : i32} : (memref<16xi32, 2>) + %async_token_3 = air.execute [%10] { + memref.dealloc %results_2 : memref<16xi32, 2> + } + air.herd_terminator + } + %4 = air.wait_all async + %async_token_1, %results_1 = air.execute -> (memref<32xi32, 1>) { + %alloc1 = memref.alloc() : memref<32xi32, 1> + air.execute_terminator %alloc1 : memref<32xi32, 1> + } + %5 = air.channel.get async [%4, %async_token_1] @channel_1[%c0_2, %c0_2] (%results_1[%c0_2] [%c16] []) {id = 6 : i32} : (memref<32xi32, 1>) + %6 = air.channel.get async [%4, %async_token_1] @channel_1[%c0_2, %c1_2] (%results_1[%c16] [%c16] []) {id = 7 : i32} : (memref<32xi32, 1>) + %7 = air.wait_all async + %8 = air.channel.put async [%7] @channel_0[] (%results_1[] [] []) {id = 8 : i32} : (memref<32xi32, 1>) + air.segment_terminator + } + air.launch_terminator + } + return + } +}