Skip to content

Commit 6a3b9a5

Browse files
authored
Enhanced conv2d by making width, height and channels configurable via Makefile. (#2022)
1 parent 5af32e7 commit 6a3b9a5

File tree

5 files changed

+192
-78
lines changed

5 files changed

+192
-78
lines changed

programming_examples/ml/conv2d/Makefile

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,21 @@ include ${srcdir}/../../makefile-common
1111

1212
mlirFileName = aie
1313

14+
# Modify these params to configure design
15+
width = 32
16+
height = 32
17+
in_channels = 64
18+
out_channels = 64
19+
vectorized ?= true
1420
trace_size = 16384
1521

1622
VPATH := ${srcdir}/../../../aie_kernels/aie2
1723

18-
aie_py_src=conv2d.py
19-
aie_py_trace_src=conv2d_alt.py
20-
use_alt?=0
24+
aie_py_src = conv2d.py
25+
aie_py_trace_src = conv2d_alt.py
26+
use_alt ?= 0
27+
28+
device = npu
2129

2230
ifeq (${use_alt}, 1)
2331
aie_py_src=conv2d_alt.py
@@ -27,15 +35,29 @@ all: build/conv2dk1_i8.o build/final.xclbin
2735

2836
build/conv2dk1_i8.o: conv2dk1_i8.cc
2937
mkdir -p ${@D}
38+
ifeq ($(device),npu)
39+
ifeq ($(vectorized), true)
3040
cd ${@D} && ${PEANO_INSTALL_DIR}/bin/clang ${PEANOWRAP2_FLAGS} -DINT8_ACT -c $< -o ${@F}
41+
else
42+
cd ${@D} && ${PEANO_INSTALL_DIR}/bin/clang ${PEANOWRAP2_FLAGS} -DSCALAR -DINT8_ACT -c $< -o ${@F}
43+
endif
44+
else ifeq ($(device),npu2)
45+
ifeq ($(vectorized), true)
46+
cd ${@D} && ${PEANO_INSTALL_DIR}/bin/clang ${PEANOWRAP2P_FLAGS} -DINT8_ACT -c $< -o ${@F}
47+
else
48+
cd ${@D} && ${PEANO_INSTALL_DIR}/bin/clang ${PEANOWRAP2P_FLAGS} -DSCALAR -DINT8_ACT -c $< -o ${@F}
49+
endif
50+
else
51+
echo "Device type not supported"
52+
endif
3153

3254
build/${mlirFileName}.mlir: ${srcdir}/${aie_py_src}
3355
mkdir -p ${@D}
34-
python3 $< > $@
56+
python3 $< ${device} ${width} ${height} ${in_channels} ${out_channels} 0 > $@
3557

3658
build/${mlirFileName}_trace.mlir: ${srcdir}/${aie_py_trace_src}
3759
mkdir -p ${@D}
38-
python3 $< ${trace_size} > $@
60+
python3 $< ${device} ${width} ${height} ${in_channels} ${out_channels} ${trace_size} > $@
3961

4062
build/final.xclbin: build/${mlirFileName}.mlir build/conv2dk1_i8.o
4163
mkdir -p ${@D}
@@ -50,13 +72,13 @@ build/final_trace.xclbin: build/${mlirFileName}_trace.mlir build/conv2dk1_i8.o
5072
--xclbin-name=${@F} --npu-insts-name=insts_trace.txt $(<:%=../%)
5173

5274
run_py: build/final.xclbin
53-
${powershell} python3 ${srcdir}/test.py -x build/final.xclbin -i build/insts.txt -k MLIR_AIE
75+
${powershell} python3 ${srcdir}/test.py -x build/final.xclbin -i build/insts.txt -k MLIR_AIE -wd ${width} -ht ${height} -ic ${in_channels} -oc ${out_channels}
5476

5577
trace_py: build/final_trace.xclbin
56-
${powershell} python3 ${srcdir}/test.py -x build/final_trace.xclbin -i build/insts_trace.txt -k MLIR_AIE -t ${trace_size}
78+
${powershell} python3 ${srcdir}/test.py -x build/final_trace.xclbin -i build/insts_trace.txt -k MLIR_AIE -wd ${width} -ht ${height} -ic ${in_channels} -oc ${out_channels} -t ${trace_size}
5779
${srcdir}/../../utils/parse_trace.py --filename log/trace_conv2d.txt --mlir build/aie_trace.mlir --colshift 1 > log/trace_conv2d.json
5880

5981
clean:
6082
rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log* *.xclbin sim \
6183
chess* *.o insts.txt \
62-
*.log aie_partition.json *.bin BOOT.BIN _x test.exe
84+
*.log aie_partition.json *.bin BOOT.BIN _x test.exe

programming_examples/ml/conv2d/README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,15 @@ To compile the design:
8181
make
8282
```
8383

84-
To compile the design:
84+
To compile the design using the lower-level IRON version (currently needed for trace):
8585
```shell
8686
env use_alt=1 make
8787
```
8888

8989
To run the design:
9090
```shell
9191
make run_py
92-
```
92+
```
93+
94+
## Configure design
95+
To configure the parameters of the convolution such as data width, height and the number of input and output channels, you can edit the top of the `Makefile`. Choosing the scalar or vectorized version of the kernel can likewise be selected in the `Makefile` by modifying the `vectorized` variable.

programming_examples/ml/conv2d/conv2d.py

Lines changed: 54 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,24 @@
1212
from aie.iron.device import NPU1Col1
1313
from aie.iron.controlflow import range_
1414

15-
width = 32
16-
height = 32
17-
in_channels = 64
18-
out_channels = 64
1915

20-
if len(sys.argv) == 3:
21-
width = int(sys.argv[1])
22-
height = int(sys.argv[2])
16+
def conv2dk1(
17+
dev, width: int, height: int, in_channels: int, out_channels: int, trace_size: int
18+
):
2319

20+
actIn = width * in_channels # 32*64 = 2048
21+
bufIn = actIn * 2 # double buffer
2422

25-
actIn = width * in_channels # 32*64 = 2048
26-
bufIn = actIn * 2 # double buffer
23+
weights = in_channels * out_channels
2724

28-
weights = in_channels * out_channels
25+
actOut = width * out_channels # 32*64 = 2048
26+
bufOut = actOut * 2 # double buffer
2927

30-
actOut = width * out_channels # 32*64 = 2048
31-
bufOut = actOut * 2 # double buffer
28+
tensorInSize = width * height * in_channels
29+
tensorOutSize = width * height * out_channels
3230

33-
tensorSize = width * height * in_channels
31+
N_in_bytes = tensorOutSize # Number of bytes of output data (1 byte/elem)
3432

35-
N_in_bytes = tensorSize # Number of bytes of output data (1 byte/elem)
36-
37-
38-
def conv2dk1(trace_size: int):
3933
# Type definitions
4034
actIn_ty = np.ndarray[(actIn,), np.dtype[np.int8]]
4135
bufIn_ty = np.ndarray[(bufIn,), np.dtype[np.int8]]
@@ -44,7 +38,8 @@ def conv2dk1(trace_size: int):
4438

4539
out_ty = np.ndarray[(actOut,), np.dtype[np.int8]]
4640
bufOut_ty = np.ndarray[(bufOut,), np.dtype[np.int8]]
47-
tensor_ty = np.ndarray[(tensorSize,), np.dtype[np.int8]]
41+
tensorIn_ty = np.ndarray[(tensorInSize,), np.dtype[np.int8]]
42+
tensorOut_ty = np.ndarray[(tensorOutSize,), np.dtype[np.int8]]
4843

4944
# AIE Core Function declarations
5045
conv2dk1_i8_kernel = Kernel(
@@ -82,10 +77,10 @@ def conv2dk1(trace_size: int):
8277

8378
# Task for the core to perform
8479
def core_fn(of_wts, of_act, of_out, my_rtp, conv2dk1_i8):
85-
y_dim = 32
86-
x_dim = 32
87-
ci = 64
88-
co = 64
80+
y_dim = height
81+
x_dim = width
82+
ci = in_channels
83+
co = out_channels
8984

9085
elemWts = of_wts.acquire(1)
9186
scale = my_rtp[0]
@@ -114,7 +109,7 @@ def core_fn(of_wts, of_act, of_out, my_rtp, conv2dk1_i8):
114109

115110
# Runtime operations to move data to/from the AIE-array
116111
rt = Runtime()
117-
with rt.sequence(tensor_ty, weights_ty, tensor_ty) as (I, W, O):
112+
with rt.sequence(tensorIn_ty, weights_ty, tensorOut_ty) as (I, W, O):
118113
# Initialize the runtime parameter values
119114
def set_rtps(my_rtp):
120115
my_rtp[0] = 10
@@ -130,9 +125,39 @@ def set_rtps(my_rtp):
130125
rt.drain(of_outOFL2L3.cons(), O, wait=True)
131126

132127
# Place components (assign them resources on the device) and generate an MLIR module
133-
return Program(NPU1Col1(), rt).resolve_program(SequentialPlacer())
134-
135-
136-
if __name__ == "__main__":
137-
trace_size = 0 if (len(sys.argv) != 2) else int(sys.argv[1])
138-
print(conv2dk1(trace_size=trace_size))
128+
return Program(dev, rt).resolve_program(SequentialPlacer())
129+
130+
131+
try:
132+
device_name = str(sys.argv[1])
133+
if device_name == "npu":
134+
dev = NPU1Col1()
135+
elif device_name == "npu2":
136+
dev = NPU2()
137+
else:
138+
raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
139+
width = int(sys.argv[2])
140+
if width % 8 != 0 or width < 8:
141+
print("Width size must be a multiple of 8 and greater than or equal to 8")
142+
raise ValueError
143+
height = int(sys.argv[3])
144+
if height < 2:
145+
print("Height needs to be > 1 at the moment (BUG)")
146+
raise ValueError
147+
in_channels = int(sys.argv[4])
148+
if in_channels % 8 != 0 or in_channels < 8:
149+
print(
150+
"Input channels size must be a multiple of 8 and greater than or equal to 8"
151+
)
152+
raise ValueError
153+
out_channels = int(sys.argv[5])
154+
if out_channels % 8 != 0 or out_channels < 8:
155+
print(
156+
"Output channel size must be a multiple of 8 and greater than or equal to 8"
157+
)
158+
raise ValueError
159+
trace_size = 0 if (len(sys.argv) != 7) else int(sys.argv[6])
160+
except ValueError:
161+
print("Argument has inappropriate value")
162+
module = conv2dk1(dev, width, height, in_channels, out_channels, trace_size)
163+
print(module)

programming_examples/ml/conv2d/conv2d_alt.py

Lines changed: 55 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,31 +13,24 @@
1313
from aie.helpers.dialects.ext.scf import _for as range_
1414
import aie.utils.trace as trace_utils
1515

16-
width = 32
17-
height = 32
18-
in_channels = 64
19-
out_channels = 64
2016

21-
if len(sys.argv) == 3:
22-
width = int(sys.argv[1])
23-
height = int(sys.argv[2])
24-
25-
26-
actIn = width * in_channels # 32*64 = 2048
27-
bufIn = actIn * 2 # double buffer
17+
def conv2dk1(
18+
dev, width: int, height: int, in_channels: int, out_channels: int, trace_size: int
19+
):
20+
with mlir_mod_ctx() as ctx:
2821

29-
weights = in_channels * out_channels
22+
actIn = width * in_channels # 32*64 = 2048
23+
bufIn = actIn * 2 # double buffer
3024

31-
actOut = width * out_channels # 32*64 = 2048
32-
bufOut = actOut * 2 # double buffer
25+
weights = in_channels * out_channels
3326

34-
tensorSize = width * height * in_channels
27+
actOut = width * out_channels # 32*64 = 2048
28+
bufOut = actOut * 2 # double buffer
3529

36-
N_in_bytes = tensorSize # Number of bytes of output data (1 byte/elem)
30+
tensorInSize = width * height * in_channels
31+
tensorOutSize = width * height * out_channels
3732

38-
39-
def conv2dk1(trace_size: int):
40-
with mlir_mod_ctx() as ctx:
33+
N_in_bytes = tensorOutSize # Number of bytes of output data (1 byte/elem)
4134

4235
@device(AIEDevice.npu1_1col)
4336
def device_body():
@@ -49,7 +42,8 @@ def device_body():
4942

5043
out_ty = np.ndarray[(actOut,), np.dtype[np.int8]]
5144
bufOut_ty = np.ndarray[(bufOut,), np.dtype[np.int8]]
52-
tensor_ty = np.ndarray[(tensorSize,), np.dtype[np.int8]]
45+
tensorIn_ty = np.ndarray[(tensorInSize,), np.dtype[np.int8]]
46+
tensorOut_ty = np.ndarray[(tensorOutSize,), np.dtype[np.int8]]
5347

5448
# AIE Core Function declarations
5549
conv2dk1_i8 = external_func(
@@ -105,10 +99,10 @@ def device_body():
10599
# Compute tile 2
106100
@core(ComputeTile2, "conv2dk1_i8.o")
107101
def core_body():
108-
y_dim = 32
109-
x_dim = 32
110-
ci = 64
111-
co = 64
102+
y_dim = height
103+
x_dim = width
104+
ci = in_channels
105+
co = out_channels
112106

113107
for _ in range_(0xFFFFFFFF):
114108
elemWts = of_inOF_wts_0_L3L2.acquire(ObjectFifoPort.Consume, 1)
@@ -126,7 +120,7 @@ def core_body():
126120
of_inOF_wts_0_L3L2.release(ObjectFifoPort.Consume, 1)
127121

128122
# To/from AIE-array data movement
129-
@runtime_sequence(tensor_ty, weights_ty, tensor_ty)
123+
@runtime_sequence(tensorIn_ty, weights_ty, tensorOut_ty)
130124
def sequence(I, W, O):
131125

132126
if trace_size > 0:
@@ -139,7 +133,7 @@ def sequence(I, W, O):
139133
in_act_task = shim_dma_single_bd_task(
140134
of_inOF_act_L3L2,
141135
I,
142-
sizes=[1, 1, 1, tensorSize],
136+
sizes=[1, 1, 1, tensorInSize],
143137
issue_token=True,
144138
)
145139
in_wts_task = shim_dma_single_bd_task(
@@ -151,7 +145,7 @@ def sequence(I, W, O):
151145
out_task = shim_dma_single_bd_task(
152146
of_outOFL2L3,
153147
O,
154-
sizes=[1, 1, 1, tensorSize],
148+
sizes=[1, 1, 1, tensorOutSize],
155149
issue_token=True,
156150
)
157151

@@ -163,5 +157,37 @@ def sequence(I, W, O):
163157

164158

165159
if __name__ == "__main__":
166-
trace_size = 0 if (len(sys.argv) != 2) else int(sys.argv[1])
167-
conv2dk1(trace_size=trace_size)
160+
try:
161+
device_name = str(sys.argv[1])
162+
if device_name == "npu":
163+
# dev = NPU1Col1()
164+
dev = 0 # placeholders
165+
elif device_name == "npu2":
166+
# dev = NPU2()
167+
dev = 1 # placeholders
168+
else:
169+
raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
170+
width = int(sys.argv[2])
171+
if width % 8 != 0 or width < 8:
172+
print("Width size must be a multiple of 8 and greater than or equal to 8")
173+
raise ValueError
174+
height = int(sys.argv[3])
175+
if height % 8 != 0 or height < 8:
176+
print("Height size must be a multiple of 8 and greater than or equal to 8")
177+
raise ValueError
178+
in_channels = int(sys.argv[4])
179+
if in_channels % 8 != 0 or in_channels < 8:
180+
print(
181+
"Input channels size must be a multiple of 8 and greater than or equal to 8"
182+
)
183+
raise ValueError
184+
out_channels = int(sys.argv[5])
185+
if out_channels % 8 != 0 or out_channels < 8:
186+
print(
187+
"Output channel size must be a multiple of 8 and greater than or equal to 8"
188+
)
189+
raise ValueError
190+
trace_size = 0 if (len(sys.argv) != 7) else int(sys.argv[6])
191+
except ValueError:
192+
print("Argument has inappropriate value")
193+
conv2dk1(dev, width, height, in_channels, out_channels, trace_size)

0 commit comments

Comments
 (0)