Skip to content

Commit 6d86140

Browse files
erwei-xilinxclaude
andauthored
Add shim DMA BD reuse tests for >16 task sequences (#3026)
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5b5376b commit 6d86140

File tree

4 files changed

+545
-0
lines changed

4 files changed

+545
-0
lines changed
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
//
2+
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
3+
// See https://llvm.org/LICENSE.txt for license information.
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
//
6+
// (c) Copyright 2026 Advanced Micro Devices, Inc.
7+
8+
// RUN: aie-opt --aie-assign-runtime-sequence-bd-ids %s | FileCheck %s
9+
10+
// Tests that >16 tasks on one shim tile succeed when dma_free_task
11+
// recycles BD IDs between batches. Two host buffers alternate on
12+
// the same MM2S channel (simulates RoPE LUT + V interleaving).
13+
//
14+
// Batch 1: 10 tasks → BD IDs 0-9.
15+
// Free all → IDs 0-9 available.
16+
// Batch 2: 10 tasks → reuses BD IDs 0-9.
17+
// Total: 20 tasks on one tile (exceeds 16 limit without reuse).
18+
19+
module {
20+
aie.device(npu2) {
21+
%tile_0_0 = aie.tile(0, 0)
22+
23+
aie.runtime_sequence(%buf_a: memref<40960xbf16>, %buf_b: memref<40960xbf16>) {
24+
25+
// ===== Batch 1: 10 tasks, BD IDs 0-9 =====
26+
27+
// CHECK: aie.dma_bd(%arg0 : memref<40960xbf16>, 0, 4096) {bd_id = 0 : i32}
28+
%t0 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
29+
aie.dma_bd(%buf_a : memref<40960xbf16>, 0, 4096)
30+
aie.end
31+
}
32+
aiex.dma_start_task(%t0)
33+
34+
// CHECK: aie.dma_bd(%arg1 : memref<40960xbf16>, 0, 4096) {bd_id = 1 : i32}
35+
%t1 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
36+
aie.dma_bd(%buf_b : memref<40960xbf16>, 0, 4096)
37+
aie.end
38+
}
39+
aiex.dma_start_task(%t1)
40+
41+
// CHECK: aie.dma_bd({{.*}} {bd_id = 2 : i32}
42+
%t2 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
43+
aie.dma_bd(%buf_a : memref<40960xbf16>, 4096, 4096)
44+
aie.end
45+
}
46+
aiex.dma_start_task(%t2)
47+
48+
// CHECK: aie.dma_bd({{.*}} {bd_id = 3 : i32}
49+
%t3 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
50+
aie.dma_bd(%buf_b : memref<40960xbf16>, 4096, 4096)
51+
aie.end
52+
}
53+
aiex.dma_start_task(%t3)
54+
55+
// CHECK: aie.dma_bd({{.*}} {bd_id = 4 : i32}
56+
%t4 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
57+
aie.dma_bd(%buf_a : memref<40960xbf16>, 8192, 4096)
58+
aie.end
59+
}
60+
aiex.dma_start_task(%t4)
61+
62+
// CHECK: aie.dma_bd({{.*}} {bd_id = 5 : i32}
63+
%t5 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
64+
aie.dma_bd(%buf_b : memref<40960xbf16>, 8192, 4096)
65+
aie.end
66+
}
67+
aiex.dma_start_task(%t5)
68+
69+
// CHECK: aie.dma_bd({{.*}} {bd_id = 6 : i32}
70+
%t6 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
71+
aie.dma_bd(%buf_a : memref<40960xbf16>, 12288, 4096)
72+
aie.end
73+
}
74+
aiex.dma_start_task(%t6)
75+
76+
// CHECK: aie.dma_bd({{.*}} {bd_id = 7 : i32}
77+
%t7 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
78+
aie.dma_bd(%buf_b : memref<40960xbf16>, 12288, 4096)
79+
aie.end
80+
}
81+
aiex.dma_start_task(%t7)
82+
83+
// CHECK: aie.dma_bd({{.*}} {bd_id = 8 : i32}
84+
%t8 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
85+
aie.dma_bd(%buf_a : memref<40960xbf16>, 16384, 4096)
86+
aie.end
87+
}
88+
aiex.dma_start_task(%t8)
89+
90+
// CHECK: aie.dma_bd({{.*}} {bd_id = 9 : i32}
91+
%t9 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
92+
aie.dma_bd(%buf_b : memref<40960xbf16>, 16384, 4096)
93+
aie.end
94+
}
95+
aiex.dma_start_task(%t9)
96+
97+
// Await last task (guarantees all prior sequential tasks completed),
98+
// then free each task's BDs individually to recycle IDs.
99+
aiex.dma_await_task(%t9)
100+
aiex.dma_free_task(%t0)
101+
aiex.dma_free_task(%t1)
102+
aiex.dma_free_task(%t2)
103+
aiex.dma_free_task(%t3)
104+
aiex.dma_free_task(%t4)
105+
aiex.dma_free_task(%t5)
106+
aiex.dma_free_task(%t6)
107+
aiex.dma_free_task(%t7)
108+
aiex.dma_free_task(%t8)
109+
110+
// ===== Batch 2: 10 more tasks, reuses BD IDs 0-9 =====
111+
112+
// CHECK: aie.dma_bd(%arg0 : memref<40960xbf16>, 20480, 4096) {bd_id = 0 : i32}
113+
%t10 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
114+
aie.dma_bd(%buf_a : memref<40960xbf16>, 20480, 4096)
115+
aie.end
116+
}
117+
aiex.dma_start_task(%t10)
118+
119+
// CHECK: aie.dma_bd(%arg1 : memref<40960xbf16>, 20480, 4096) {bd_id = 1 : i32}
120+
%t11 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
121+
aie.dma_bd(%buf_b : memref<40960xbf16>, 20480, 4096)
122+
aie.end
123+
}
124+
aiex.dma_start_task(%t11)
125+
126+
// CHECK: aie.dma_bd({{.*}} {bd_id = 2 : i32}
127+
%t12 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
128+
aie.dma_bd(%buf_a : memref<40960xbf16>, 24576, 4096)
129+
aie.end
130+
}
131+
aiex.dma_start_task(%t12)
132+
133+
// CHECK: aie.dma_bd({{.*}} {bd_id = 3 : i32}
134+
%t13 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
135+
aie.dma_bd(%buf_b : memref<40960xbf16>, 24576, 4096)
136+
aie.end
137+
}
138+
aiex.dma_start_task(%t13)
139+
140+
// CHECK: aie.dma_bd({{.*}} {bd_id = 4 : i32}
141+
%t14 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
142+
aie.dma_bd(%buf_a : memref<40960xbf16>, 28672, 4096)
143+
aie.end
144+
}
145+
aiex.dma_start_task(%t14)
146+
147+
// CHECK: aie.dma_bd({{.*}} {bd_id = 5 : i32}
148+
%t15 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
149+
aie.dma_bd(%buf_b : memref<40960xbf16>, 28672, 4096)
150+
aie.end
151+
}
152+
aiex.dma_start_task(%t15)
153+
154+
// CHECK: aie.dma_bd({{.*}} {bd_id = 6 : i32}
155+
%t16 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
156+
aie.dma_bd(%buf_a : memref<40960xbf16>, 32768, 4096)
157+
aie.end
158+
}
159+
aiex.dma_start_task(%t16)
160+
161+
// CHECK: aie.dma_bd({{.*}} {bd_id = 7 : i32}
162+
%t17 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
163+
aie.dma_bd(%buf_b : memref<40960xbf16>, 32768, 4096)
164+
aie.end
165+
}
166+
aiex.dma_start_task(%t17)
167+
168+
// CHECK: aie.dma_bd({{.*}} {bd_id = 8 : i32}
169+
%t18 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
170+
aie.dma_bd(%buf_a : memref<40960xbf16>, 36864, 4096)
171+
aie.end
172+
}
173+
aiex.dma_start_task(%t18)
174+
175+
// CHECK: aie.dma_bd({{.*}} {bd_id = 9 : i32}
176+
%t19 = aiex.dma_configure_task(%tile_0_0, MM2S, 0) {
177+
aie.dma_bd(%buf_b : memref<40960xbf16>, 36864, 4096)
178+
aie.end
179+
}
180+
aiex.dma_start_task(%t19)
181+
182+
aiex.dma_await_task(%t19)
183+
}
184+
}
185+
}

0 commit comments

Comments
 (0)