Skip to content

Commit 2262f7d

Browse files
authored
Use npu.dma_wait instead of npu.sync, check run.wait() return value (#1639)
1 parent 3ff9d72 commit 2262f7d

File tree

27 files changed

+92
-48
lines changed

27 files changed

+92
-48
lines changed

test/npu-xrt/add_12_i8_using_2d_dma_op_with_padding/aie.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ module {
6363
%c61_i64 = arith.constant 61 : i64
6464
%c64_i64 = arith.constant 64 : i64
6565
aiex.npu.dma_memcpy_nd (0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c61_i64, %c56_i64][%c0_i64, %c0_i64, %c56_i64, %c1_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<61x56xi8>
66-
aiex.npu.dma_memcpy_nd (0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c64_i64, %c64_i64][%c0_i64, %c0_i64, %c64_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64x64xi8>
67-
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
66+
aiex.npu.dma_memcpy_nd (0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c64_i64, %c64_i64][%c0_i64, %c0_i64, %c64_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0, issue_token = true} : memref<64x64xi8>
67+
aiex.npu.dma_wait { symbol = @objFifo_out0 }
6868
return
6969
}
7070

test/npu-xrt/add_12_i8_using_2d_dma_op_with_padding/test.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,11 @@ int main(int argc, const char *argv[]) {
9494

9595
unsigned int opcode = 3;
9696
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out);
97-
run.wait();
97+
ert_cmd_state r = run.wait();
98+
if (r != ERT_CMD_STATE_COMPLETED) {
99+
std::cout << "Kernel did not complete. Returned status: " << r << "\n";
100+
return 1;
101+
}
98102

99103
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
100104

test/npu-xrt/add_21_i8_using_dma_op_with_padding/aie.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ module {
7070
%c32_i64 = arith.constant 32 : i64
7171
%c64_i64 = arith.constant 64 : i64
7272
aiex.npu.dma_memcpy_nd (0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c32_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<64xi8>
73-
aiex.npu.dma_memcpy_nd (0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64xi8>
74-
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
73+
aiex.npu.dma_memcpy_nd (0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0, issue_token = true} : memref<64xi8>
74+
aiex.npu.dma_wait { symbol = @objFifo_out0 }
7575
return
7676
}
7777

test/npu-xrt/add_21_i8_using_dma_op_with_padding/test.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,11 @@ int main(int argc, const char *argv[]) {
9494

9595
unsigned int opcode = 3;
9696
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out);
97-
run.wait();
97+
ert_cmd_state r = run.wait();
98+
if (r != ERT_CMD_STATE_COMPLETED) {
99+
std::cout << "Kernel did not complete. Returned status: " << r << "\n";
100+
return 1;
101+
}
98102

99103
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
100104

test/npu-xrt/add_256_using_dma_op_no_double_buffering/aie.mlir

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,14 @@ module {
9898
}
9999

100100
// the absolutely only thing that's relevant here is (MM2S, 0, 0) and (S2MM, 0, 0)
101-
memref.global "public" @this_just_creates_a_symbol_and_the_type_means_nothing_in : memref<1xi32>
102-
memref.global "public" @this_just_creates_a_symbol_and_the_type_means_nothing_out : memref<1xi32>
103-
aie.shim_dma_allocation @this_just_creates_a_symbol_and_the_type_means_nothing_in(MM2S, 0, 0)
104-
aie.shim_dma_allocation @this_just_creates_a_symbol_and_the_type_means_nothing_out(S2MM, 0, 0)
101+
memref.global "public" @data_in : memref<1xi32>
102+
memref.global "public" @data_out : memref<1xi32>
103+
aie.shim_dma_allocation @data_in(MM2S, 0, 0)
104+
aie.shim_dma_allocation @data_out(S2MM, 0, 0)
105105
func.func @bobsyouruncle(%arg0: memref<64xi32>, %arg1: memref<32xi32>, %arg2: memref<64xi32>) {
106-
aiex.npu.dma_memcpy_nd (0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0, 1]) {id = 0 : i64, metadata = @this_just_creates_a_symbol_and_the_type_means_nothing_in} : memref<64xi32>
107-
aiex.npu.dma_memcpy_nd (0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0, 1]) {id = 1 : i64, metadata = @this_just_creates_a_symbol_and_the_type_means_nothing_out} : memref<64xi32>
108-
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
106+
aiex.npu.dma_memcpy_nd (0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0, 1]) {id = 0 : i64, metadata = @data_in} : memref<64xi32>
107+
aiex.npu.dma_memcpy_nd (0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0, 1]) {id = 1 : i64, metadata = @data_out, issue_token = true} : memref<64xi32>
108+
aiex.npu.dma_wait {symbol = @data_out}
109109
return
110110
}
111111
}

test/npu-xrt/add_256_using_dma_op_no_double_buffering/test.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,11 @@ int main(int argc, const char *argv[]) {
9191

9292
unsigned int opcode = 3;
9393
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out);
94-
run.wait();
94+
ert_cmd_state r = run.wait();
95+
if (r != ERT_CMD_STATE_COMPLETED) {
96+
std::cout << "Kernel did not complete. Returned status: " << r << "\n";
97+
return 1;
98+
}
9599

96100
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
97101

test/npu-xrt/add_314_using_dma_op/aie.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ module {
6969
%c1_i64 = arith.constant 1 : i64
7070
%c64_i64 = arith.constant 64 : i64
7171
aiex.npu.dma_memcpy_nd (0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<64xi32>
72-
aiex.npu.dma_memcpy_nd (0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64xi32>
73-
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
72+
aiex.npu.dma_memcpy_nd (0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0, issue_token = true} : memref<64xi32>
73+
aiex.npu.dma_wait { symbol = @objFifo_out0 }
7474
return
7575
}
7676

test/npu-xrt/add_314_using_dma_op/test.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,11 @@ int main(int argc, const char *argv[]) {
9191

9292
unsigned int opcode = 3;
9393
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out);
94-
run.wait();
94+
ert_cmd_state r = run.wait();
95+
if (r != ERT_CMD_STATE_COMPLETED) {
96+
std::cout << "Kernel did not complete. Returned status: " << r << "\n";
97+
return 1;
98+
}
9599

96100
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
97101

test/npu-xrt/add_378_i32_using_dma_op_with_padding/aie.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ module {
7070
%c52_i64 = arith.constant 52 : i64
7171
%c64_i64 = arith.constant 64 : i64
7272
aiex.npu.dma_memcpy_nd (0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c52_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<64xi32>
73-
aiex.npu.dma_memcpy_nd (0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64xi32>
74-
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
73+
aiex.npu.dma_memcpy_nd (0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0, issue_token = true} : memref<64xi32>
74+
aiex.npu.dma_wait {symbol = @objFifo_out0}
7575
return
7676
}
7777

test/npu-xrt/add_378_i32_using_dma_op_with_padding/test.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,11 @@ int main(int argc, const char *argv[]) {
9191

9292
unsigned int opcode = 3;
9393
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inB, bo_out);
94-
run.wait();
94+
ert_cmd_state r = run.wait();
95+
if (r != ERT_CMD_STATE_COMPLETED) {
96+
std::cout << "Kernel did not complete. Returned status: " << r << "\n";
97+
return 1;
98+
}
9599

96100
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
97101

0 commit comments

Comments
 (0)