Skip to content

Commit d638ff6

Browse files
committed
Creating shim io test support for npu3 full elf flow
Signed-off-by: Hayden Laccabue <[email protected]>
1 parent 078b9a6 commit d638ff6

File tree

7 files changed

+148
-14
lines changed

7 files changed

+148
-14
lines changed

src/shim/buffer.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ uint8_t
2121
use_to_fw_debug_type(uint8_t use)
2222
{
2323
switch (use) {
24-
case XRT_BO_USE_DEBUG:
25-
return AMDXDNA_FW_BUF_DEBUG;
24+
case XRT_BO_USE_DEBUG: // USE_DEBUG and USE_UC_DEBUG should return same type, 2 different cases for different devices
2625
case XRT_BO_USE_UC_DEBUG:
2726
return AMDXDNA_FW_BUF_DEBUG;
2827
case XRT_BO_USE_DTRACE:

test/shim_test/dev_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ enum kernel_type {
1313
KERNEL_TYPE_DPU_SEQ = 0,
1414
KERNEL_TYPE_TXN,
1515
KERNEL_TYPE_TXN_PREEMPT,
16+
KERNEL_TYPE_TXN_FULL_ELF,
1617
KERNEL_TYPE_TXN_FULL_ELF_PREEMPT,
1718
};
1819

test/shim_test/hwctx.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ class hw_ctx {
3434
{
3535
xrt::xclbin xclbin;
3636
xrt::elf elf;
37-
auto is_full_elf = (get_kernel_type(dev, xclbin_name) == KERNEL_TYPE_TXN_FULL_ELF_PREEMPT);
37+
auto kernel_type = get_kernel_type(dev, xclbin_name);
38+
auto is_full_elf = (kernel_type == KERNEL_TYPE_TXN_FULL_ELF_PREEMPT ||
39+
kernel_type == KERNEL_TYPE_TXN_FULL_ELF);
3840
auto path = get_xclbin_path(dev, xclbin_name);
3941

4042
try {

test/shim_test/io.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,47 @@ elf_io_test_bo_set(device* dev, const std::string& xclbin_name) :
384384
}
385385
}
386386

387+
elf_full_io_test_bo_set::
388+
elf_full_io_test_bo_set(device* dev, const std::string& xclbin_name)
389+
: io_test_bo_set_base(dev, xclbin_name)
390+
{
391+
auto elf_path = get_xclbin_path(dev, xclbin_name.c_str());
392+
m_elf = xrt::elf(elf_path);
393+
auto mod = xrt::module{m_elf};
394+
auto kernel_name = get_kernel_name(dev, xclbin_name.c_str());
395+
396+
try {
397+
m_kernel_index = module_int::get_ctrlcode_id(mod, kernel_name);
398+
} catch (const std::exception& e) {
399+
m_kernel_index = module_int::no_ctrl_code_id;
400+
}
401+
402+
for (int i = 0; i < IO_TEST_BO_MAX_TYPES; i++) {
403+
auto& ibo = m_bo_array[i];
404+
auto type = static_cast<io_test_bo_type>(i);
405+
406+
switch(type) {
407+
case IO_TEST_BO_CMD:
408+
alloc_cmd_bo(ibo, m_dev);
409+
break;
410+
case IO_TEST_BO_INSTRUCTION:
411+
create_ctrl_bo_from_elf(ibo, patcher::buf_type::ctrltext);
412+
break;
413+
case IO_TEST_BO_INPUT:
414+
create_data_bo_from_file(ibo, "ifm.bin", m_FLAG_OPT);
415+
break;
416+
case IO_TEST_BO_PARAMETERS:
417+
create_data_bo_from_file(ibo, "wts.bin", m_FLAG_OPT);
418+
break;
419+
case IO_TEST_BO_OUTPUT:
420+
create_data_bo_from_file(ibo, "ofm.bin", m_FLAG_NO_FILL|m_FLAG_OPT);
421+
break;
422+
default:
423+
break;
424+
}
425+
}
426+
}
427+
387428
elf_preempt_io_test_bo_set::
388429
elf_preempt_io_test_bo_set(device* dev, const std::string& xclbin_name)
389430
: io_test_bo_set_base(dev, xclbin_name)
@@ -605,6 +646,30 @@ init_cmd(hw_ctx& hwctx, bool dump)
605646
patcher::buf_type::ctrltext, m_elf, module_int::no_ctrl_code_id);
606647
}
607648

649+
void
650+
elf_full_io_test_bo_set::
651+
init_cmd(hw_ctx& hwctx, bool dump)
652+
{
653+
exec_buf ebuf(*m_bo_array[IO_TEST_BO_CMD].tbo.get(), ERT_START_DPU);
654+
655+
xrt_core::cuidx_type cu_idx{0};
656+
ebuf.set_cu_idx(cu_idx);
657+
658+
if (m_bo_array[IO_TEST_BO_INPUT].tbo.get()) {
659+
ebuf.add_arg_64(3);
660+
ebuf.add_arg_bo(*m_bo_array[IO_TEST_BO_INPUT].tbo.get(), "0");
661+
ebuf.add_arg_bo(*m_bo_array[IO_TEST_BO_PARAMETERS].tbo.get(), "1");
662+
ebuf.add_arg_bo(*m_bo_array[IO_TEST_BO_OUTPUT].tbo.get(), "2");
663+
}
664+
665+
if (dump)
666+
ebuf.dump();
667+
668+
ebuf.add_ctrl_bo(*m_bo_array[IO_TEST_BO_INSTRUCTION].tbo.get());
669+
ebuf.patch_ctrl_code(*m_bo_array[IO_TEST_BO_INSTRUCTION].tbo.get(),
670+
patcher::buf_type::ctrltext, m_elf, m_kernel_index);
671+
}
672+
608673
void
609674
elf_preempt_io_test_bo_set::
610675
init_cmd(hw_ctx& hwctx, bool dump)
@@ -883,6 +948,32 @@ get_preemption_checkpoints()
883948
return 0;
884949
}
885950

951+
void
952+
io_test_bo_set_base::
953+
cache_cmd_header()
954+
{
955+
auto cbo = m_bo_array[IO_TEST_BO_CMD].tbo.get();
956+
if (!cbo)
957+
return;
958+
auto pkt = reinterpret_cast<ert_packet *>(cbo->map());
959+
if (!m_cached_cmd_header)
960+
m_cached_cmd_header = pkt->header;
961+
}
962+
963+
void
964+
io_test_bo_set_base::
965+
restore_cmd_header()
966+
{
967+
auto cbo = m_bo_array[IO_TEST_BO_CMD].tbo.get();
968+
if (!cbo || !m_cached_cmd_header)
969+
return;
970+
971+
auto pkt = reinterpret_cast<ert_packet *>(cbo->map());
972+
pkt->header = m_cached_cmd_header;
973+
pkt->state = ERT_CMD_STATE_NEW;
974+
std::atomic_thread_fence(std::memory_order_seq_cst);
975+
}
976+
886977
unsigned long
887978
elf_preempt_io_test_bo_set::
888979
get_preemption_checkpoints()

test/shim_test/io.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ class io_test_bo_set_base
7070
virtual void
7171
verify_result();
7272

73+
void
74+
cache_cmd_header();
75+
76+
void
77+
restore_cmd_header();
78+
7379
static const char *
7480
bo_type2name(int type);
7581

@@ -90,6 +96,7 @@ class io_test_bo_set_base
9096
const int m_FLAG_OPT = 1 << 1;
9197
const int m_FLAG_NO_FILL = 1 << 2;
9298
const int m_FLAG_DEV_BUF = 1 << 3;
99+
uint32_t m_cached_cmd_header = 0;
93100

94101
void
95102
create_data_bo_from_file(io_test_bo& ibo, const std::string filename, int flags);
@@ -124,6 +131,15 @@ class elf_io_test_bo_set : public io_test_bo_set_base
124131
init_cmd(hw_ctx& hwctx, bool dump) override;
125132
};
126133

134+
class elf_full_io_test_bo_set : public io_test_bo_set_base
135+
{
136+
public:
137+
elf_full_io_test_bo_set(device *dev, const std::string& xclbin_name);
138+
139+
void
140+
init_cmd(hw_ctx& hwctx, bool dump) override;
141+
};
142+
127143
class elf_preempt_io_test_bo_set : public io_test_bo_set_base
128144
{
129145
public:

test/shim_test/io_test.cpp

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ alloc_and_init_bo_set(device* dev, const char *xclbin)
4343
base = std::make_unique<elf_io_test_bo_set>(dev, std::string(xclbin));
4444
break;
4545
case KERNEL_TYPE_TXN_PREEMPT:
46+
case KERNEL_TYPE_TXN_FULL_ELF:
47+
base = std::make_unique<elf_full_io_test_bo_set>(dev, xclbin ? std::string(xclbin) : get_xclbin_name(dev));
48+
break;
4649
case KERNEL_TYPE_TXN_FULL_ELF_PREEMPT:
4750
base = std::make_unique<elf_preempt_io_test_bo_set>(dev, std::string(xclbin));
4851
break;
@@ -113,16 +116,25 @@ void
113116
io_test_cmd_submit_and_wait_latency(
114117
hwqueue_handle *hwq,
115118
int total_cmd_submission,
116-
std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos
119+
std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos,
120+
std::vector< std::unique_ptr<io_test_bo_set_base> >* bo_set_ptr = nullptr
117121
)
118122
{
119123
int completed = 0;
120-
int wait_idx = 0;
121124

122125
while (completed < total_cmd_submission) {
123-
for (auto& cmd : cmdlist_bos) {
126+
for (size_t i = 0; i < cmdlist_bos.size(); i++) {
127+
auto& cmd = cmdlist_bos[i];
128+
// For UMQ, restore header before resubmission
129+
if (completed > 0 && bo_set_ptr && i < bo_set_ptr->size()) {
130+
(*bo_set_ptr)[i]->restore_cmd_header();
131+
}
132+
124133
hwq->submit_command(std::get<0>(cmd).get()->get());
125134
io_test_cmd_wait(hwq, std::get<0>(cmd));
135+
136+
if (bo_set_ptr && i < bo_set_ptr->size())
137+
std::atomic_thread_fence(std::memory_order_acquire);
126138
auto state = std::get<1>(cmd)->state;
127139
if (state != ERT_CMD_STATE_COMPLETED) {
128140
std::string errmsg = "Command ";
@@ -135,7 +147,8 @@ io_test_cmd_submit_and_wait_latency(
135147
completed++;
136148
if (completed >= total_cmd_submission)
137149
break;
138-
std::get<1>(cmd)->state = ERT_CMD_STATE_NEW;
150+
if (!bo_set_ptr)
151+
std::get<1>(cmd)->state = ERT_CMD_STATE_NEW;
139152
}
140153
}
141154
}
@@ -144,15 +157,17 @@ void
144157
io_test_cmd_submit_and_wait_thruput(
145158
hwqueue_handle *hwq,
146159
int total_cmd_submission,
147-
std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos
160+
std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos,
161+
std::vector< std::unique_ptr<io_test_bo_set_base> >* bo_set_ptr = nullptr
148162
)
149163
{
150164
int issued = 0;
151165
int completed = 0;
152166
int wait_idx = 0;
153167

154168
for (auto& cmd : cmdlist_bos) {
155-
std::get<1>(cmd)->state = ERT_CMD_STATE_NEW;
169+
if (!bo_set_ptr)
170+
std::get<1>(cmd)->state = ERT_CMD_STATE_NEW;
156171
hwq->submit_command(std::get<0>(cmd).get()->get());
157172
issued++;
158173
if (issued >= total_cmd_submission)
@@ -167,11 +182,15 @@ io_test_cmd_submit_and_wait_thruput(
167182
completed++;
168183

169184
if (issued < total_cmd_submission) {
170-
std::get<1>(cmdlist_bos[wait_idx])->state = ERT_CMD_STATE_NEW;
185+
// For UMQ, restore header; for KMQ, just reset state
186+
if (bo_set_ptr && wait_idx < bo_set_ptr->size())
187+
(*bo_set_ptr)[wait_idx]->restore_cmd_header();
188+
else
189+
std::get<1>(cmdlist_bos[wait_idx])->state = ERT_CMD_STATE_NEW;
190+
171191
hwq->submit_command(std::get<0>(cmdlist_bos[wait_idx]).get()->get());
172192
issued++;
173193
}
174-
175194
if (++wait_idx == cmdlist_bos.size())
176195
wait_idx = 0;
177196
}
@@ -255,6 +274,7 @@ io_test(device::id_type id, device* dev, int total_hwq_submit, int num_cmdlist,
255274
// Initialize cmd before submission
256275
for (auto& boset : bo_set) {
257276
boset->init_cmd(hwctx, io_test_parameters.debug);
277+
boset->cache_cmd_header();
258278
boset->sync_before_run();
259279
}
260280

@@ -292,10 +312,15 @@ io_test(device::id_type id, device* dev, int total_hwq_submit, int num_cmdlist,
292312

293313
// Submit commands and wait for results
294314
auto start = clk::now();
315+
auto device_id = device_query<query::pcie_device>(dev);
316+
bool is_umq = (device_id == npu3_device_id || device_id == npu3_device_id1);
317+
295318
if (io_test_parameters.perf == IO_TEST_THRUPUT_PERF)
296-
io_test_cmd_submit_and_wait_thruput(hwq, total_hwq_submit, cmdlist_bos);
319+
io_test_cmd_submit_and_wait_thruput(hwq, total_hwq_submit, cmdlist_bos,
320+
is_umq ? &bo_set : nullptr);
297321
else
298-
io_test_cmd_submit_and_wait_latency(hwq, total_hwq_submit, cmdlist_bos);
322+
io_test_cmd_submit_and_wait_latency(hwq, total_hwq_submit, cmdlist_bos,
323+
is_umq ? &bo_set : nullptr);
299324
auto end = clk::now();
300325

301326
// Verify preemption counters

test/shim_test/shim_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ std::vector<test_case> test_list {
779779
TEST_POSITIVE, dev_filter_is_npu4, TEST_instr_invalid_addr_io, {}
780780
},
781781
test_case{ "measure no-op kernel latency", {},
782-
TEST_POSITIVE, dev_filter_is_aie2, TEST_io_latency, { IO_TEST_NOOP_RUN, IO_TEST_IOCTL_WAIT, 32000 }
782+
TEST_POSITIVE, dev_filter_is_xdna, TEST_io_latency, { IO_TEST_NOOP_RUN, IO_TEST_IOCTL_WAIT, 32000 }
783783
},
784784
test_case{ "measure real kernel latency", {},
785785
TEST_POSITIVE, dev_filter_is_aie2, TEST_io_latency, { IO_TEST_NORMAL_RUN, IO_TEST_IOCTL_WAIT, 32000 }

0 commit comments

Comments
 (0)