Skip to content

Commit 4e3a9d0

Browse files
AndraBiscapvasireddy-amdjgmelberabiscagithub-actions[bot]
authored
Access buffer from adjacent memtile's DMA (#1927)
Co-authored-by: Pranathi Vasireddy <[email protected]> Co-authored-by: Joseph Melber <[email protected]> Co-authored-by: AndraBisca <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 7e01df8 commit 4e3a9d0

File tree

8 files changed

+862
-5
lines changed

8 files changed

+862
-5
lines changed

include/aie/Targets/AIERT.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,12 @@ static_assert(XAIE_OK == 0);
182182
#define NUM_LOCKS 16
183183
#define EVEN_BD_NUM_START 0
184184
#define ODD_BD_NUM_START 24
185+
#define MEM_TILE_LOCK_ID_INCR_WEST 0
185186
#define MEM_TILE_LOCK_ID_INCR 64
187+
#define MEM_TILE_LOCK_ID_INCR_EAST 128
188+
#define BASE_ADDR_A_INCR_WEST 0x00000
186189
#define BASE_ADDR_A_INCR 0x80000
190+
#define BASE_ADDR_A_INCR_EAST 0x100000
187191

188192
namespace xilinx::AIE {
189193
struct AIERTControl {

lib/Targets/AIERT.cpp

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,12 +127,13 @@ LogicalResult AIERTControl::configureLocksInBdBlock(XAie_DmaDesc &dmaTileBd,
127127
bool acqEn = false;
128128

129129
// switch (lock->getAc)
130+
LockOp lock;
130131
for (auto op : block.getOps<UseLockOp>()) {
131132
// Only dyn_cast if you are going to check if it was of the type
132133
// expected; if you aren't checking use cast instead as it will at
133134
// least assert in debug mode with an easier to understand error than
134135
// dereferencing.
135-
LockOp lock = cast<LockOp>(op.getLock().getDefiningOp());
136+
lock = cast<LockOp>(op.getLock().getDefiningOp());
136137
switch (op.getAction()) {
137138
case LockAction::Acquire:
138139
case LockAction::AcquireGreaterEqual:
@@ -153,10 +154,26 @@ LogicalResult AIERTControl::configureLocksInBdBlock(XAie_DmaDesc &dmaTileBd,
153154
"expected both use_lock(acquire) and use_lock(release) with bd");
154155

155156
if (targetModel.isMemTile(tileLoc.Col, tileLoc.Row)) {
157+
// check if buffer is allocated on the same memtile, the west, or the east
158+
// one
159+
int increaseValue = 0;
160+
auto lockRow = lock.rowIndex();
161+
auto lockCol = lock.colIndex();
162+
bool isWestLock =
163+
targetModel.isWest(tileLoc.Col, tileLoc.Row, lockCol, lockRow);
164+
bool isEastLock =
165+
targetModel.isEast(tileLoc.Col, tileLoc.Row, lockCol, lockRow);
166+
if (isWestLock) {
167+
increaseValue = MEM_TILE_LOCK_ID_INCR_WEST;
168+
} else if (isEastLock) {
169+
increaseValue = MEM_TILE_LOCK_ID_INCR_EAST;
170+
} else {
171+
increaseValue = MEM_TILE_LOCK_ID_INCR;
172+
}
156173
if (acqLockId)
157-
acqLockId.value() += MEM_TILE_LOCK_ID_INCR;
174+
acqLockId.value() += increaseValue;
158175
if (relLockId)
159-
relLockId.value() += MEM_TILE_LOCK_ID_INCR;
176+
relLockId.value() += increaseValue;
160177
}
161178

162179
// no RelEn in the arch spec even though the API requires you to set it?
@@ -207,8 +224,23 @@ LogicalResult AIERTControl::configureBdInBlock(XAie_DmaDesc &dmaTileBd,
207224
if (!bufferOp.getAddress())
208225
return bufferOp.emitError("buffer must have address assigned");
209226
baseAddr = bufferOp.getAddress().value();
210-
if (targetModel.isMemTile(tileLoc.Col, tileLoc.Row))
211-
baseAddr += BASE_ADDR_A_INCR;
227+
if (targetModel.isMemTile(tileLoc.Col, tileLoc.Row)) {
228+
// check if buffer is allocated on the same memtile, the west, or the east
229+
// one
230+
auto bufferRow = bufferOp.getTileOp().getRow();
231+
auto bufferCol = bufferOp.getTileOp().getCol();
232+
bool isWestBuff =
233+
targetModel.isWest(tileLoc.Col, tileLoc.Row, bufferCol, bufferRow);
234+
bool isEastBuff =
235+
targetModel.isEast(tileLoc.Col, tileLoc.Row, bufferCol, bufferRow);
236+
if (isWestBuff) {
237+
baseAddr += BASE_ADDR_A_INCR_WEST;
238+
} else if (isEastBuff) {
239+
baseAddr += BASE_ADDR_A_INCR_EAST;
240+
} else {
241+
baseAddr += BASE_ADDR_A_INCR;
242+
}
243+
}
212244
}
213245

214246
std::optional<llvm::ArrayRef<BDDimLayoutAttr>> dims = bdOp.getDimensions();
Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
//===- aie.mlir ------------------------------------------------*- MLIR -*-===//
2+
//
3+
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
module {
12+
aie.device(npu1_4col) {
13+
memref.global "public" @out_cons : memref<16xi32>
14+
memref.global "public" @out : memref<16xi32>
15+
memref.global "public" @in2_mem_cons : memref<256xi32>
16+
memref.global "public" @in2_mem : memref<256xi32>
17+
memref.global "public" @in1_cons : memref<16xi32>
18+
memref.global "public" @in1 : memref<16xi32>
19+
%tile_0_0 = aie.tile(0, 0)
20+
%tile_0_1 = aie.tile(0, 1)
21+
%tile_1_1 = aie.tile(1, 1)
22+
%tile_2_1 = aie.tile(2, 1)
23+
%tile_0_2 = aie.tile(0, 2)
24+
%out_buff_0 = aie.buffer(%tile_0_2) {sym_name = "out_buff_0"} : memref<16xi32>
25+
%out_buff_1 = aie.buffer(%tile_0_2) {sym_name = "out_buff_1"} : memref<16xi32>
26+
%out_prod_lock = aie.lock(%tile_0_2, 4) {init = 2 : i32, sym_name = "out_prod_lock"}
27+
%out_cons_lock = aie.lock(%tile_0_2, 5) {init = 0 : i32, sym_name = "out_cons_lock"}
28+
%in2_mem_cons_buff_0 = aie.buffer(%tile_0_2) {sym_name = "in2_mem_cons_buff_0"} : memref<256xi32>
29+
%in2_mem_cons_buff_1 = aie.buffer(%tile_0_2) {sym_name = "in2_mem_cons_buff_1"} : memref<256xi32>
30+
%in2_mem_cons_prod_lock = aie.lock(%tile_0_2, 2) {init = 2 : i32, sym_name = "in2_mem_cons_prod_lock"}
31+
%in2_mem_cons_cons_lock = aie.lock(%tile_0_2, 3) {init = 0 : i32, sym_name = "in2_mem_cons_cons_lock"}
32+
%in2_mem_buff_0 = aie.buffer(%tile_0_1) {sym_name = "in2_mem_buff_0"} : memref<64xi32> = dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]>
33+
%in2_mem_buff_1 = aie.buffer(%tile_1_1) {sym_name = "in2_mem_buff_1"} : memref<64xi32> = dense<[64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127]>
34+
%in2_mem_buff_2 = aie.buffer(%tile_2_1) {sym_name = "in2_mem_buff_2"} : memref<64xi32> = dense<[128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191]>
35+
%in2_mem_buff_3 = aie.buffer(%tile_2_1) {sym_name = "in2_mem_buff_3"} : memref<64xi32> = dense<[192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255]>
36+
%in2_mem_prod_lock = aie.lock(%tile_0_1, 0) {init = 0 : i32, sym_name = "in2_mem_prod_lock"}
37+
%in2_mem_cons_lock = aie.lock(%tile_0_1, 1) {init = 1 : i32, sym_name = "in2_mem_cons_lock"}
38+
%in3_mem_prod_lock = aie.lock(%tile_1_1, 0) {init = 0 : i32, sym_name = "in3_mem_prod_lock"}
39+
%in3_mem_cons_lock = aie.lock(%tile_1_1, 1) {init = 1 : i32, sym_name = "in3_mem_cons_lock"}
40+
%in4_mem_prod_lock = aie.lock(%tile_2_1, 0) {init = 0 : i32, sym_name = "in4_mem_prod_lock"}
41+
%in4_mem_cons_lock = aie.lock(%tile_2_1, 1) {init = 2 : i32, sym_name = "in4_mem_cons_lock"}
42+
%in1_cons_buff_0 = aie.buffer(%tile_0_2) {sym_name = "in1_cons_buff_0"} : memref<16xi32>
43+
%in1_cons_buff_1 = aie.buffer(%tile_0_2) {sym_name = "in1_cons_buff_1"} : memref<16xi32>
44+
%in1_cons_prod_lock = aie.lock(%tile_0_2, 0) {init = 2 : i32, sym_name = "in1_cons_prod_lock"}
45+
%in1_cons_cons_lock = aie.lock(%tile_0_2, 1) {init = 0 : i32, sym_name = "in1_cons_cons_lock"}
46+
aie.flow(%tile_0_0, DMA : 0, %tile_0_2, DMA : 0)
47+
aie.flow(%tile_1_1, DMA : 0, %tile_0_2, DMA : 1)
48+
aie.flow(%tile_0_2, DMA : 0, %tile_0_0, DMA : 0)
49+
%core_0_2 = aie.core(%tile_0_2) {
50+
%c16 = arith.constant 16 : index
51+
%c0 = arith.constant 0 : index
52+
%c1 = arith.constant 1 : index
53+
%c9223372036854775806 = arith.constant 9223372036854775806 : index
54+
%c2 = arith.constant 2 : index
55+
scf.for %arg0 = %c0 to %c9223372036854775806 step %c2 {
56+
aie.use_lock(%in2_mem_cons_cons_lock, AcquireGreaterEqual, 1)
57+
scf.for %arg1 = %c0 to %c16 step %c2 {
58+
aie.use_lock(%in1_cons_cons_lock, AcquireGreaterEqual, 1)
59+
aie.use_lock(%out_prod_lock, AcquireGreaterEqual, 1)
60+
scf.for %arg2 = %c0 to %c16 step %c1 {
61+
%1 = memref.load %in1_cons_buff_0[%arg2] : memref<16xi32>
62+
%2 = arith.muli %arg1, %c16 : index
63+
%3 = arith.addi %arg2, %2 : index
64+
%4 = memref.load %in2_mem_cons_buff_0[%3] : memref<256xi32>
65+
%5 = arith.addi %1, %4 : i32
66+
memref.store %5, %out_buff_0[%arg2] : memref<16xi32>
67+
}
68+
aie.use_lock(%in1_cons_prod_lock, Release, 1)
69+
aie.use_lock(%out_cons_lock, Release, 1)
70+
%0 = arith.addi %arg1, %c1 : index
71+
aie.use_lock(%in1_cons_cons_lock, AcquireGreaterEqual, 1)
72+
aie.use_lock(%out_prod_lock, AcquireGreaterEqual, 1)
73+
scf.for %arg2 = %c0 to %c16 step %c2 {
74+
%1 = memref.load %in1_cons_buff_1[%arg2] : memref<16xi32>
75+
%2 = arith.muli %0, %c16 : index
76+
%3 = arith.addi %arg2, %2 : index
77+
%4 = memref.load %in2_mem_cons_buff_0[%3] : memref<256xi32>
78+
%5 = arith.addi %1, %4 : i32
79+
memref.store %5, %out_buff_1[%arg2] : memref<16xi32>
80+
%6 = arith.addi %arg2, %c1 : index
81+
%7 = memref.load %in1_cons_buff_1[%6] : memref<16xi32>
82+
%8 = arith.muli %0, %c16 : index
83+
%9 = arith.addi %6, %8 : index
84+
%10 = memref.load %in2_mem_cons_buff_0[%9] : memref<256xi32>
85+
%11 = arith.addi %7, %10 : i32
86+
memref.store %11, %out_buff_1[%6] : memref<16xi32>
87+
}
88+
aie.use_lock(%in1_cons_prod_lock, Release, 1)
89+
aie.use_lock(%out_cons_lock, Release, 1)
90+
}
91+
aie.use_lock(%in2_mem_cons_prod_lock, Release, 1)
92+
aie.use_lock(%in2_mem_cons_cons_lock, AcquireGreaterEqual, 1)
93+
scf.for %arg1 = %c0 to %c16 step %c2 {
94+
aie.use_lock(%in1_cons_cons_lock, AcquireGreaterEqual, 1)
95+
aie.use_lock(%out_prod_lock, AcquireGreaterEqual, 1)
96+
scf.for %arg2 = %c0 to %c16 step %c1 {
97+
%1 = memref.load %in1_cons_buff_0[%arg2] : memref<16xi32>
98+
%2 = arith.muli %arg1, %c16 : index
99+
%3 = arith.addi %arg2, %2 : index
100+
%4 = memref.load %in2_mem_cons_buff_1[%3] : memref<256xi32>
101+
%5 = arith.addi %1, %4 : i32
102+
memref.store %5, %out_buff_0[%arg2] : memref<16xi32>
103+
}
104+
aie.use_lock(%in1_cons_prod_lock, Release, 1)
105+
aie.use_lock(%out_cons_lock, Release, 1)
106+
%0 = arith.addi %arg1, %c1 : index
107+
aie.use_lock(%in1_cons_cons_lock, AcquireGreaterEqual, 1)
108+
aie.use_lock(%out_prod_lock, AcquireGreaterEqual, 1)
109+
scf.for %arg2 = %c0 to %c16 step %c2 {
110+
%1 = memref.load %in1_cons_buff_1[%arg2] : memref<16xi32>
111+
%2 = arith.muli %0, %c16 : index
112+
%3 = arith.addi %arg2, %2 : index
113+
%4 = memref.load %in2_mem_cons_buff_1[%3] : memref<256xi32>
114+
%5 = arith.addi %1, %4 : i32
115+
memref.store %5, %out_buff_1[%arg2] : memref<16xi32>
116+
%6 = arith.addi %arg2, %c1 : index
117+
%7 = memref.load %in1_cons_buff_1[%6] : memref<16xi32>
118+
%8 = arith.muli %0, %c16 : index
119+
%9 = arith.addi %6, %8 : index
120+
%10 = memref.load %in2_mem_cons_buff_1[%9] : memref<256xi32>
121+
%11 = arith.addi %7, %10 : i32
122+
memref.store %11, %out_buff_1[%6] : memref<16xi32>
123+
}
124+
aie.use_lock(%in1_cons_prod_lock, Release, 1)
125+
aie.use_lock(%out_cons_lock, Release, 1)
126+
}
127+
aie.use_lock(%in2_mem_cons_prod_lock, Release, 1)
128+
}
129+
aie.use_lock(%in2_mem_cons_cons_lock, AcquireGreaterEqual, 1)
130+
scf.for %arg0 = %c0 to %c16 step %c2 {
131+
aie.use_lock(%in1_cons_cons_lock, AcquireGreaterEqual, 1)
132+
aie.use_lock(%out_prod_lock, AcquireGreaterEqual, 1)
133+
scf.for %arg1 = %c0 to %c16 step %c1 {
134+
%1 = memref.load %in1_cons_buff_0[%arg1] : memref<16xi32>
135+
%2 = arith.muli %arg0, %c16 : index
136+
%3 = arith.addi %arg1, %2 : index
137+
%4 = memref.load %in2_mem_cons_buff_0[%3] : memref<256xi32>
138+
%5 = arith.addi %1, %4 : i32
139+
memref.store %5, %out_buff_0[%arg1] : memref<16xi32>
140+
}
141+
aie.use_lock(%in1_cons_prod_lock, Release, 1)
142+
aie.use_lock(%out_cons_lock, Release, 1)
143+
%0 = arith.addi %arg0, %c1 : index
144+
aie.use_lock(%in1_cons_cons_lock, AcquireGreaterEqual, 1)
145+
aie.use_lock(%out_prod_lock, AcquireGreaterEqual, 1)
146+
scf.for %arg1 = %c0 to %c16 step %c2 {
147+
%1 = memref.load %in1_cons_buff_1[%arg1] : memref<16xi32>
148+
%2 = arith.muli %0, %c16 : index
149+
%3 = arith.addi %arg1, %2 : index
150+
%4 = memref.load %in2_mem_cons_buff_0[%3] : memref<256xi32>
151+
%5 = arith.addi %1, %4 : i32
152+
memref.store %5, %out_buff_1[%arg1] : memref<16xi32>
153+
%6 = arith.addi %arg1, %c1 : index
154+
%7 = memref.load %in1_cons_buff_1[%6] : memref<16xi32>
155+
%8 = arith.muli %0, %c16 : index
156+
%9 = arith.addi %6, %8 : index
157+
%10 = memref.load %in2_mem_cons_buff_0[%9] : memref<256xi32>
158+
%11 = arith.addi %7, %10 : i32
159+
memref.store %11, %out_buff_1[%6] : memref<16xi32>
160+
}
161+
aie.use_lock(%in1_cons_prod_lock, Release, 1)
162+
aie.use_lock(%out_cons_lock, Release, 1)
163+
}
164+
aie.use_lock(%in2_mem_cons_prod_lock, Release, 1)
165+
aie.end
166+
}
167+
aie.shim_dma_allocation @in1(MM2S, 0, 0)
168+
aiex.runtime_sequence(%arg0: memref<256xi32>, %arg1: memref<256xi32>, %arg2: memref<256xi32>) {
169+
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 256][0, 0, 0, 1]) {id = 1 : i64, metadata = @in1} : memref<256xi32>
170+
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 256][0, 0, 0, 1]) {id = 0 : i64, metadata = @out} : memref<256xi32>
171+
aiex.npu.dma_wait {symbol = @out}
172+
}
173+
%mem_0_2 = aie.mem(%tile_0_2) {
174+
%0 = aie.dma_start(S2MM, 0, ^bb1, ^bb3)
175+
^bb1: // 2 preds: ^bb0, ^bb2
176+
aie.use_lock(%in1_cons_prod_lock, AcquireGreaterEqual, 1)
177+
aie.dma_bd(%in1_cons_buff_0 : memref<16xi32>, 0, 16)
178+
aie.use_lock(%in1_cons_cons_lock, Release, 1)
179+
aie.next_bd ^bb2
180+
^bb2: // pred: ^bb1
181+
aie.use_lock(%in1_cons_prod_lock, AcquireGreaterEqual, 1)
182+
aie.dma_bd(%in1_cons_buff_1 : memref<16xi32>, 0, 16)
183+
aie.use_lock(%in1_cons_cons_lock, Release, 1)
184+
aie.next_bd ^bb1
185+
^bb3: // pred: ^bb0
186+
%1 = aie.dma_start(S2MM, 1, ^bb4, ^bb6)
187+
^bb4: // 2 preds: ^bb3, ^bb5
188+
aie.use_lock(%in2_mem_cons_prod_lock, AcquireGreaterEqual, 1)
189+
aie.dma_bd(%in2_mem_cons_buff_0 : memref<256xi32>, 0, 256)
190+
aie.use_lock(%in2_mem_cons_cons_lock, Release, 1)
191+
aie.next_bd ^bb5
192+
^bb5: // pred: ^bb4
193+
aie.use_lock(%in2_mem_cons_prod_lock, AcquireGreaterEqual, 1)
194+
aie.dma_bd(%in2_mem_cons_buff_1 : memref<256xi32>, 0, 256)
195+
aie.use_lock(%in2_mem_cons_cons_lock, Release, 1)
196+
aie.next_bd ^bb4
197+
^bb6: // pred: ^bb3
198+
%2 = aie.dma_start(MM2S, 0, ^bb7, ^bb9)
199+
^bb7: // 2 preds: ^bb6, ^bb8
200+
aie.use_lock(%out_cons_lock, AcquireGreaterEqual, 1)
201+
aie.dma_bd(%out_buff_0 : memref<16xi32>, 0, 16)
202+
aie.use_lock(%out_prod_lock, Release, 1)
203+
aie.next_bd ^bb8
204+
^bb8: // pred: ^bb7
205+
aie.use_lock(%out_cons_lock, AcquireGreaterEqual, 1)
206+
aie.dma_bd(%out_buff_1 : memref<16xi32>, 0, 16)
207+
aie.use_lock(%out_prod_lock, Release, 1)
208+
aie.next_bd ^bb7
209+
^bb9: // pred: ^bb6
210+
aie.end
211+
}
212+
aie.shim_dma_allocation @out(S2MM, 0, 0)
213+
%memtile_dma_1_1 = aie.memtile_dma(%tile_1_1) {
214+
%0 = aie.dma_start(MM2S, 0, ^bb1, ^bb5)
215+
^bb1:
216+
aie.use_lock(%in2_mem_cons_lock, AcquireGreaterEqual, 1)
217+
aie.dma_bd(%in2_mem_buff_0 : memref<64xi32>, 0, 64)
218+
aie.use_lock(%in2_mem_prod_lock, Release, 1)
219+
aie.next_bd ^bb2
220+
^bb2:
221+
aie.use_lock(%in3_mem_cons_lock, AcquireGreaterEqual, 1)
222+
aie.dma_bd(%in2_mem_buff_1 : memref<64xi32>, 0, 64)
223+
aie.use_lock(%in3_mem_prod_lock, Release, 1)
224+
aie.next_bd ^bb3
225+
^bb3:
226+
aie.use_lock(%in4_mem_cons_lock, AcquireGreaterEqual, 1)
227+
aie.dma_bd(%in2_mem_buff_2 : memref<64xi32>, 0, 64)
228+
aie.use_lock(%in4_mem_prod_lock, Release, 1)
229+
aie.next_bd ^bb4
230+
^bb4:
231+
aie.use_lock(%in4_mem_cons_lock, AcquireGreaterEqual, 1)
232+
aie.dma_bd(%in2_mem_buff_3 : memref<64xi32>, 0, 64)
233+
aie.use_lock(%in4_mem_prod_lock, Release, 1)
234+
aie.next_bd ^bb5
235+
^bb5:
236+
aie.end
237+
}
238+
}
239+
}
240+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// (c) Copyright 2024 Advanced Micro Devices, Inc.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
//
4+
// REQUIRES: ryzen_ai
5+
//
6+
// RUN: %python aiecc.py --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --alloc-scheme=basic-sequential --xclbin-name=aie.xclbin --npu-insts-name=insts.txt %S/aie.mlir
7+
// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags
8+
// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
9+
// CHECK: PASS!
10+

0 commit comments

Comments
 (0)