Skip to content

Commit 4c71d38

Browse files
AndraBiscaabiscagithub-actions[bot]
authored
[Object FIFO] Fix error where dmaBd length is computed incorrectly when there are dims (#2026)
Co-authored-by: AndraBisca <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 44bc926 commit 4c71d38

File tree

3 files changed

+441
-7
lines changed

3 files changed

+441
-7
lines changed

lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -844,13 +844,11 @@ struct AIEObjectFifoStatefulTransformPass
844844
}
845845
} else {
846846
if (target != op) {
847-
if (dims.getValue().empty()) {
848-
auto targetFifo =
849-
llvm::cast<AIEObjectFifoType>(target.getElemType());
850-
auto targetElemType =
851-
llvm::cast<MemRefType>(targetFifo.getElementType());
852-
lenOut = targetElemType.getNumElements();
853-
}
847+
auto targetFifo =
848+
llvm::cast<AIEObjectFifoType>(target.getElemType());
849+
auto targetElemType =
850+
llvm::cast<MemRefType>(targetFifo.getElementType());
851+
lenOut = targetElemType.getNumElements();
854852
}
855853
}
856854

test/npu-xrt/dma_complex_dims/aie2.py

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
# dma_complex_dims/aie2.py -*- Python -*-
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
8+
9+
# REQUIRES: ryzen_ai, peano
10+
#
11+
# RUN: %python %S/aie2.py --m 8 --k 5 --K 20 --r 4 --s 5 > ./aie2.mlir
12+
# RUN: %python aiecc.py --no-aiesim --no-xchesscc --aie-generate-cdo --aie-generate-npu --aie-generate-xclbin --no-compile-host --xclbin-name=final.xclbin --npu-insts-name=insts.txt ./aie2.mlir
13+
# RUN: clang %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags
14+
# RUN: %run_on_npu ./test.exe -x final.xclbin -i insts.txt -k MLIR_AIE --m 8 --k 5 --K 20 --r 4 --s 5
15+
import argparse
16+
import numpy as np
17+
import sys
18+
19+
from aie.dialects.aie import *
20+
from aie.dialects.aiex import *
21+
from aie.extras.context import mlir_mod_ctx
22+
from aie.helpers.dialects.ext.scf import _for as range_
23+
from aie.helpers.taplib import TensorAccessPattern
24+
25+
26+
# this resembles the buffer A data layout and transformations
27+
def my_passthrough(m, k, K, r, s):
28+
29+
# large K must be divisible by small k
30+
assert K % k == 0
31+
32+
# assertions for m and k which should be divisible by the API sizes
33+
assert m % r == 0
34+
assert k % s == 0
35+
36+
# compute tile is m x k (small tile)
37+
comp_tile_ty = np.ndarray[(m, k), np.dtype[np.int32]]
38+
39+
# memory tile is m x K (larger tile)
40+
mem_tile_ty = np.ndarray[(m, K), np.dtype[np.int32]]
41+
42+
with mlir_mod_ctx() as ctx:
43+
44+
@device(AIEDevice.npu1_1col)
45+
def device_body():
46+
47+
# Tile declarations
48+
ShimTile = tile(0, 0)
49+
MemTile = tile(0, 1)
50+
ComputeTile = tile(0, 2)
51+
52+
# AIE-array data movement with object fifos
53+
54+
# Input
55+
of_in_shim_to_mem = object_fifo(
56+
"shim_to_mem",
57+
ShimTile,
58+
MemTile,
59+
2,
60+
mem_tile_ty,
61+
)
62+
63+
of_in_mem_to_comp = object_fifo(
64+
"mem_to_comp",
65+
MemTile,
66+
ComputeTile,
67+
2,
68+
comp_tile_ty,
69+
# 4D transformation in MemTile (MM2S)
70+
# Assumes that the "higher" MemTile size
71+
# defines the 4D transformation
72+
[
73+
(K // k, m * k),
74+
(k // s, s),
75+
(m, k),
76+
(s, 1),
77+
],
78+
# 3D transformation in CompTile (S2MM)
79+
[
80+
[
81+
(k // s, r * s),
82+
(m // r, r * k),
83+
(r * s, 1),
84+
]
85+
],
86+
)
87+
88+
# links mem to comp
89+
object_fifo_link(of_in_shim_to_mem, of_in_mem_to_comp)
90+
91+
# Output
92+
of_out_comp_to_mem = object_fifo(
93+
"comp_to_mem",
94+
ComputeTile,
95+
MemTile,
96+
2,
97+
comp_tile_ty,
98+
)
99+
100+
of_out_mem_to_shim = object_fifo(
101+
"mem_to_shim", MemTile, ShimTile, 2, mem_tile_ty
102+
)
103+
104+
# links comp to mem
105+
object_fifo_link(of_out_comp_to_mem, of_out_mem_to_shim)
106+
107+
# Compute tile just passes, doesn't do any operation
108+
@core(ComputeTile)
109+
def core_body():
110+
for _ in range_(sys.maxsize):
111+
for _ in range_(K // k):
112+
elem_in = of_in_mem_to_comp.acquire(ObjectFifoPort.Consume, 1)
113+
elem_out = of_out_comp_to_mem.acquire(ObjectFifoPort.Produce, 1)
114+
for i in range_(m):
115+
for j in range_(k):
116+
elem_out[i, j] = elem_in[i, j]
117+
118+
of_in_mem_to_comp.release(ObjectFifoPort.Consume, 1)
119+
of_out_comp_to_mem.release(ObjectFifoPort.Produce, 1)
120+
121+
# set the runtime type as 1D array
122+
runtime_ty = np.ndarray[(m * K,), np.dtype[np.int32]]
123+
124+
# To/from AIE-array data movement
125+
@runtime_sequence(runtime_ty, runtime_ty, runtime_ty)
126+
def sequence(A, B, C):
127+
npu_dma_memcpy_nd(
128+
metadata=of_in_shim_to_mem,
129+
bd_id=1,
130+
mem=A,
131+
sizes=[1, 1, 1, m * K],
132+
)
133+
134+
npu_dma_memcpy_nd(
135+
metadata=of_out_mem_to_shim, bd_id=0, mem=C, sizes=[1, 1, 1, m * K]
136+
)
137+
# wait only on output since input will have completed before output
138+
dma_wait(of_out_mem_to_shim)
139+
140+
print(ctx.module)
141+
142+
143+
if __name__ == "__main__":
144+
p = argparse.ArgumentParser()
145+
p.add_argument("dims", help="m, k, K, r, s", type=int, nargs="*")
146+
args = p.parse_args()
147+
148+
if len(args.dims) != 5:
149+
print("ERROR: Must provide all 5 dimensions", file=sys.stderr)
150+
exit(-1)
151+
152+
my_passthrough(
153+
m=args.dims[0],
154+
k=args.dims[1],
155+
K=args.dims[2],
156+
r=args.dims[3],
157+
s=args.dims[4],
158+
)

0 commit comments

Comments
 (0)