Skip to content

Commit 4a55ee7

Browse files
Use ELF for insts in some programming examples (#2578)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 96655a8 commit 4a55ee7

File tree

11 files changed

+71
-121
lines changed

11 files changed

+71
-121
lines changed

programming_examples/basic/memcpy/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ build/${targetname}.mlir: ${srcdir}/${aie_py_src}
5454
# movement configuration, compute core program memories).
5555
build/final.xclbin: build/${targetname}.mlir build/passThrough.cc.o
5656
mkdir -p ${@D}
57-
cd ${@D} && aiecc.py --aie-generate-xclbin --aie-generate-npu-insts --no-compile-host \
57+
cd ${@D} && aiecc.py --aie-generate-xclbin --no-compile-host \
5858
--xclbin-name=${@F} \
5959
--no-xchesscc --no-xbridge \
60-
--aie-generate-npu --npu-insts-name=insts.bin $(<:%=../%)
60+
--aie-generate-elf --elf-name=insts.elf $(<:%=../%)
6161

6262
# Host code: Program executed on the CPU that will invoke our NPU design
6363
# and verify its outputs.
@@ -73,7 +73,7 @@ else
7373
endif
7474

7575
run: ${targetname}.exe build/final.xclbin
76-
${powershell} ./$< -x build/final.xclbin -i build/insts.bin -k MLIR_AIE -l ${length}
76+
${powershell} ./$< -x build/final.xclbin -i build/insts.elf -k MLIR_AIE -l ${length}
7777

7878
clean:
79-
rm -rf build _build inst aie.mlir.prj core_* test.elf ${targetname}.exe
79+
rm -rf build _build inst aie.mlir.prj core_* test.elf ${targetname}.exe

programming_examples/basic/memcpy/test.cpp

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
#include "xrt/xrt_device.h"
2222
#include "xrt/xrt_kernel.h"
2323

24+
#include "xrt/experimental/xrt_elf.h"
25+
#include "xrt/experimental/xrt_ext.h"
26+
#include "xrt/experimental/xrt_module.h"
27+
2428
#include "test_utils.h"
2529

2630
int main(int argc, const char *argv[]) {
@@ -60,19 +64,14 @@ int main(int argc, const char *argv[]) {
6064
return 1;
6165
}
6266

63-
std::vector<uint32_t> instr_v =
64-
test_utils::load_instr_binary(vm["instr"].as<std::string>());
65-
66-
int verbosity = vm["verbosity"].as<int>();
67-
if (verbosity >= 1)
68-
std::cout << "Sequence instr count: " << instr_v.size() << std::endl;
69-
7067
int N = vm["length"].as<int>();
7168
if ((N % 1024)) {
7269
std::cerr << "Length must be a multiple of 1024." << std::endl;
7370
return 1;
7471
}
7572

73+
int verbosity = vm["verbosity"].as<int>();
74+
7675
// Start the XRT test code
7776
// Get a device handle
7877
unsigned int device_index = 0;
@@ -105,6 +104,9 @@ int main(int argc, const char *argv[]) {
105104

106105
device.register_xclbin(xclbin);
107106

107+
xrt::elf elf(vm["instr"].as<std::string>());
108+
xrt::module mod{elf};
109+
108110
// get a hardware context
109111
if (verbosity >= 1)
110112
std::cout << "Getting hardware context." << std::endl;
@@ -113,14 +115,10 @@ int main(int argc, const char *argv[]) {
113115
// get a kernel handle
114116
if (verbosity >= 1)
115117
std::cout << "Getting handle to kernel:" << kernelName << std::endl;
116-
auto kernel = xrt::kernel(context, kernelName);
118+
auto kernel = xrt::ext::kernel(context, mod, kernelName);
117119

118-
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
119-
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
120-
auto bo_inA = xrt::bo(device, N * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY,
121-
kernel.group_id(3));
122-
auto bo_out = xrt::bo(device, N * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY,
123-
kernel.group_id(4));
120+
xrt::bo bo_inA = xrt::ext::bo{device, N * sizeof(int32_t)};
121+
xrt::bo bo_out = xrt::ext::bo{device, N * sizeof(int32_t)};
124122

125123
if (verbosity >= 1)
126124
std::cout << "Writing data into buffer objects." << std::endl;
@@ -131,22 +129,18 @@ int main(int argc, const char *argv[]) {
131129
srcVecA.push_back(i + 1);
132130
memcpy(bufInA, srcVecA.data(), (srcVecA.size() * sizeof(uint32_t)));
133131

134-
void *bufInstr = bo_instr.map<void *>();
135-
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int));
136-
137-
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
138132
bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE);
139133

140134
if (verbosity >= 1)
141135
std::cout << "Running Kernel." << std::endl;
142136
unsigned int opcode = 3;
143137
// Setup run to configure
144-
auto cfg_run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_out);
145-
cfg_run.wait();
138+
auto cfg_run = kernel(opcode, 0, 0, bo_inA, bo_out);
139+
cfg_run.wait2();
146140
auto start = std::chrono::high_resolution_clock::now();
147141
// Test run
148-
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_out);
149-
run.wait();
142+
auto run = kernel(opcode, 0, 0, bo_inA, bo_out);
143+
run.wait2();
150144
auto stop = std::chrono::high_resolution_clock::now();
151145
const float npu_time =
152146
std::chrono::duration_cast<std::chrono::microseconds>(stop - start)

programming_examples/basic/vector_scalar_add/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ build/aie.mlir: ${srcdir}/${aie_py_src}
3030

3131
build/final.xclbin: build/aie.mlir
3232
mkdir -p ${@D}
33-
cd ${@D} && aiecc.py --aie-generate-xclbin --aie-generate-npu-insts --no-compile-host \
33+
cd ${@D} && aiecc.py --aie-generate-xclbin --aie-generate-elf --no-compile-host \
3434
--no-xchesscc --no-xbridge \
35-
--xclbin-name=${@F} --npu-insts-name=insts.bin ${<F}
35+
--xclbin-name=${@F} --elf-name=insts.elf ${<F}
3636

3737
${targetname}.exe: ${srcdir}/test.cpp
3838
rm -rf _build
@@ -46,7 +46,7 @@ else
4646
endif
4747

4848
run: ${targetname}.exe build/final.xclbin
49-
${powershell} ./$< -x build/final.xclbin -i build/insts.bin -k MLIR_AIE
49+
${powershell} ./$< -x build/final.xclbin -i build/insts.elf -k MLIR_AIE
5050

5151
clean:
5252
rm -rf build _build ${targetname}.exe

programming_examples/basic/vector_scalar_add/test.cpp

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
#include "xrt/xrt_device.h"
2121
#include "xrt/xrt_kernel.h"
2222

23+
#include "xrt/experimental/xrt_elf.h"
24+
#include "xrt/experimental/xrt_ext.h"
25+
#include "xrt/experimental/xrt_module.h"
26+
2327
#include "test_utils.h"
2428

2529
int main(int argc, const char *argv[]) {
@@ -40,12 +44,6 @@ int main(int argc, const char *argv[]) {
4044
constexpr int IN_SIZE = 1024;
4145
constexpr int OUT_SIZE = 1024;
4246

43-
// Load instruction sequence
44-
std::vector<uint32_t> instr_v =
45-
test_utils::load_instr_binary(vm["instr"].as<std::string>());
46-
if (verbosity >= 1)
47-
std::cout << "Sequence instr count: " << instr_v.size() << "\n";
48-
4947
// ------------------------------------------------------
5048
// Get device, load the xclbin & kernel and register them
5149
// ------------------------------------------------------
@@ -86,21 +84,21 @@ int main(int argc, const char *argv[]) {
8684
std::cout << "Getting hardware context.\n";
8785
xrt::hw_context context(device, xclbin.get_uuid());
8886

87+
// Load instr ELF
88+
xrt::elf elf(vm["instr"].as<std::string>());
89+
xrt::module mod{elf};
90+
8991
// Get a kernel handle
9092
if (verbosity >= 1)
9193
std::cout << "Getting handle to kernel:" << kernelName << "\n";
92-
auto kernel = xrt::kernel(context, kernelName);
94+
auto kernel = xrt::ext::kernel(context, mod, kernelName);
9395

9496
// ------------------------------------------------------
9597
// Initialize input/ output buffer sizes and sync them
9698
// ------------------------------------------------------
9799

98-
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
99-
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
100-
auto bo_inA = xrt::bo(device, IN_SIZE * sizeof(int32_t),
101-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
102-
auto bo_out = xrt::bo(device, OUT_SIZE * sizeof(int32_t),
103-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
100+
xrt::bo bo_inA = xrt::ext::bo{device, IN_SIZE * sizeof(int32_t)};
101+
xrt::bo bo_out = xrt::ext::bo{device, OUT_SIZE * sizeof(int32_t)};
104102

105103
if (verbosity >= 1)
106104
std::cout << "Writing data into buffer objects.\n";
@@ -111,16 +109,12 @@ int main(int argc, const char *argv[]) {
111109
srcVecA.push_back(i + 1);
112110
memcpy(bufInA, srcVecA.data(), (srcVecA.size() * sizeof(uint32_t)));
113111

114-
void *bufInstr = bo_instr.map<void *>();
115-
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int));
116-
117-
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
118112
bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE);
119113

120114
if (verbosity >= 1)
121115
std::cout << "Running Kernel.\n";
122116
unsigned int opcode = 3;
123-
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_out);
117+
auto run = kernel(opcode, 0, 0, bo_inA, bo_out);
124118
run.wait();
125119

126120
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);

programming_examples/basic/vector_scalar_add_runlist/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ build/aie.mlir: ${srcdir}/${aie_py_src}
3030

3131
build/final.xclbin: build/aie.mlir
3232
mkdir -p ${@D}
33-
cd ${@D} && aiecc.py --aie-generate-xclbin --aie-generate-npu-insts --no-compile-host \
33+
cd ${@D} && aiecc.py --aie-generate-xclbin --aie-generate-elf --no-compile-host \
3434
--no-xchesscc --no-xbridge \
35-
--xclbin-name=${@F} --npu-insts-name=insts.bin ${<F}
35+
--xclbin-name=${@F} --elf-name=insts.elf ${<F}
3636

3737
${targetname}.exe: ${srcdir}/test.cpp
3838
rm -rf _build
@@ -46,7 +46,7 @@ else
4646
endif
4747

4848
run: ${targetname}.exe build/final.xclbin
49-
${powershell} ./$< -x build/final.xclbin -i build/insts.bin -k MLIR_AIE
49+
${powershell} ./$< -x build/final.xclbin -i build/insts.elf -k MLIR_AIE
5050

5151
clean:
5252
rm -rf build _build ${targetname}.exe

programming_examples/basic/vector_scalar_add_runlist/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
# Vector Scalar Addition Runlist:
1212

13+
> NOTE: The `xrt::runlist` feature is not implemented on Phoenix (NPU1)
14+
1315
This design takes the simple Vector Scalar Addition example and shows how you can use the XRT runlist feature to chain multiple commands together.
1416

1517
This design shows an extremely simple single AIE design, which is incrementing every value in an input vector.
@@ -62,4 +64,4 @@ To run the design:
6264

6365
```shell
6466
make run
65-
```
67+
```

programming_examples/basic/vector_scalar_add_runlist/run_makefile.lit

Lines changed: 0 additions & 8 deletions
This file was deleted.

programming_examples/basic/vector_scalar_add_runlist/run_makefile_placed.lit

Lines changed: 0 additions & 11 deletions
This file was deleted.

programming_examples/basic/vector_scalar_add_runlist/test.cpp

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
#include <string>
1717
#include <vector>
1818

19+
#include "xrt/experimental/xrt_elf.h"
20+
#include "xrt/experimental/xrt_ext.h"
1921
#include "xrt/experimental/xrt_kernel.h" // for xrt::runlist
22+
#include "xrt/experimental/xrt_module.h"
23+
2024
#include "xrt/xrt_aie.h"
2125
#include "xrt/xrt_bo.h"
2226
#include "xrt/xrt_device.h"
@@ -42,12 +46,6 @@ int main(int argc, const char *argv[]) {
4246
constexpr int IN_SIZE = 1024;
4347
constexpr int OUT_SIZE = 1024;
4448

45-
// Load instruction sequence
46-
std::vector<uint32_t> instr_v =
47-
test_utils::load_instr_binary(vm["instr"].as<std::string>());
48-
if (verbosity >= 1)
49-
std::cout << "Sequence instr count: " << instr_v.size() << "\n";
50-
5149
// ------------------------------------------------------
5250
// Get device, load the xclbin & kernel and register them
5351
// ------------------------------------------------------
@@ -83,6 +81,9 @@ int main(int argc, const char *argv[]) {
8381
<< "\n";
8482
device.register_xclbin(xclbin);
8583

84+
xrt::elf elf(vm["instr"].as<std::string>());
85+
xrt::module mod{elf};
86+
8687
// Get a hardware context
8788
if (verbosity >= 1)
8889
std::cout << "Getting hardware context.\n";
@@ -91,23 +92,15 @@ int main(int argc, const char *argv[]) {
9192
// Get a kernel handle
9293
if (verbosity >= 1)
9394
std::cout << "Getting handle to kernel:" << kernelName << "\n";
94-
auto kernel = xrt::kernel(context, kernelName);
95+
auto kernel = xrt::ext::kernel(context, mod, kernelName);
9596

9697
// ------------------------------------------------------
9798
// Initialize input/ output buffer sizes and sync them
9899
// ------------------------------------------------------
99100

100-
auto bo_instr_0 = xrt::bo(device, instr_v.size() * sizeof(int),
101-
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
102-
auto bo_inA_0 = xrt::bo(device, IN_SIZE * sizeof(int32_t),
103-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
104-
auto bo_out_0 = xrt::bo(device, OUT_SIZE * sizeof(int32_t),
105-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
106-
107-
auto bo_instr_1 = xrt::bo(device, instr_v.size() * sizeof(int),
108-
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
109-
auto bo_out_1 = xrt::bo(device, OUT_SIZE * sizeof(int32_t),
110-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
101+
xrt::bo bo_inA_0 = xrt::ext::bo{device, IN_SIZE * sizeof(int32_t)};
102+
xrt::bo bo_out_0 = xrt::ext::bo{device, OUT_SIZE * sizeof(int32_t)};
103+
xrt::bo bo_out_1 = xrt::ext::bo{device, OUT_SIZE * sizeof(int32_t)};
111104

112105
if (verbosity >= 1)
113106
std::cout << "Writing data into buffer objects.\n";
@@ -121,15 +114,7 @@ int main(int argc, const char *argv[]) {
121114
uint32_t *bufInA_0 = bo_inA_0.map<uint32_t *>();
122115
memcpy(bufInA_0, srcVecA.data(), (srcVecA.size() * sizeof(uint32_t)));
123116

124-
// Getting handles to the instruction sequence BOs and copy data to them
125-
void *bufInstr_0 = bo_instr_0.map<void *>();
126-
void *bufInstr_1 = bo_instr_1.map<void *>();
127-
memcpy(bufInstr_0, instr_v.data(), instr_v.size() * sizeof(int));
128-
memcpy(bufInstr_1, instr_v.data(), instr_v.size() * sizeof(int));
129-
130117
// Synchronizing BOs
131-
bo_instr_0.sync(XCL_BO_SYNC_BO_TO_DEVICE);
132-
bo_instr_1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
133118
bo_inA_0.sync(XCL_BO_SYNC_BO_TO_DEVICE);
134119

135120
unsigned int opcode = 3;
@@ -140,16 +125,16 @@ int main(int argc, const char *argv[]) {
140125
// Creating the first run
141126
xrt::run run0 = xrt::run(kernel);
142127
run0.set_arg(0, opcode);
143-
run0.set_arg(1, bo_instr_0);
144-
run0.set_arg(2, instr_v.size());
128+
run0.set_arg(1, 0);
129+
run0.set_arg(2, 0);
145130
run0.set_arg(3, bo_inA_0);
146131
run0.set_arg(4, bo_out_0);
147132

148133
// Creating the second run
149134
xrt::run run1 = xrt::run(kernel);
150135
run1.set_arg(0, opcode);
151-
run1.set_arg(1, bo_instr_1);
152-
run1.set_arg(2, instr_v.size());
136+
run1.set_arg(1, 0);
137+
run1.set_arg(2, 0);
153138
run1.set_arg(3, bo_out_0);
154139
run1.set_arg(4, bo_out_1);
155140

programming_examples/ml/relu/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ build/${targetname}.mlir: ${srcdir}/${aie_py_src}
5050

5151
build/final.xclbin: build/${targetname}.mlir build/${targetname}.cc.o
5252
mkdir -p ${@D}
53-
cd ${@D} && aiecc.py --aie-generate-xclbin --aie-generate-npu-insts --no-compile-host \
53+
cd ${@D} && aiecc.py --aie-generate-xclbin --no-compile-host \
5454
--xclbin-name=${@F} \
5555
--no-xchesscc --no-xbridge \
56-
--aie-generate-npu --npu-insts-name=insts.bin $(<:%=../%)
56+
--aie-generate-elf --elf-name=insts.elf $(<:%=../%)
5757

5858
${targetname}.exe: ${srcdir}/test.cpp
5959
rm -rf _build
@@ -67,7 +67,7 @@ else
6767
endif
6868

6969
run: ${targetname}.exe build/final.xclbin
70-
${powershell} ./$< -x build/final.xclbin -i build/insts.bin -k MLIR_AIE -l ${length}
70+
${powershell} ./$< -x build/final.xclbin -i build/insts.elf -k MLIR_AIE -l ${length}
7171

7272
clean:
7373
rm -rf build _build inst aie.mlir.prj core_* test.elf ${targetname}.exe

0 commit comments

Comments
 (0)