Skip to content

Commit e0c0e71

Browse files
committed
Creating shim io test support for npu3 full elf flow
Signed-off-by: Hayden Laccabue <[email protected]>
1 parent 5fc2b89 commit e0c0e71

File tree

6 files changed

+152
-12
lines changed

6 files changed

+152
-12
lines changed

src/shim/buffer.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ uint8_t
2121
use_to_fw_debug_type(uint8_t use)
2222
{
2323
switch (use) {
24-
case XRT_BO_USE_DEBUG:
25-
return AMDXDNA_FW_BUF_DEBUG;
24+
case XRT_BO_USE_DEBUG: // USE_DEBUG and USE_UC_DEBUG should return same type, 2 different cases for different devices
2625
case XRT_BO_USE_UC_DEBUG:
2726
return AMDXDNA_FW_BUF_DEBUG;
2827
case XRT_BO_USE_DTRACE:

test/shim_test/dev_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ enum kernel_type {
1313
KERNEL_TYPE_DPU_SEQ = 0,
1414
KERNEL_TYPE_TXN,
1515
KERNEL_TYPE_TXN_PREEMPT,
16+
KERNEL_TYPE_TXN_FULL_ELF,
1617
KERNEL_TYPE_TXN_FULL_ELF_PREEMPT,
1718
};
1819

test/shim_test/hwctx.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ class hw_ctx {
3434
{
3535
xrt::xclbin xclbin;
3636
xrt::elf elf;
37-
auto is_full_elf = (get_kernel_type(dev, xclbin_name) == KERNEL_TYPE_TXN_FULL_ELF_PREEMPT);
37+
auto kernel_type = get_kernel_type(dev, xclbin_name);
38+
auto is_full_elf = (kernel_type == KERNEL_TYPE_TXN_FULL_ELF_PREEMPT ||
39+
kernel_type == KERNEL_TYPE_TXN_FULL_ELF);
3840
auto path = get_xclbin_path(dev, xclbin_name);
3941

4042
try {

test/shim_test/io.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,47 @@ elf_io_test_bo_set(device* dev, const std::string& xclbin_name) :
384384
}
385385
}
386386

387+
elf_full_io_test_bo_set::
388+
elf_full_io_test_bo_set(device* dev, const std::string& xclbin_name)
389+
: io_test_bo_set_base(dev, xclbin_name)
390+
{
391+
auto elf_path = get_xclbin_path(dev, xclbin_name.c_str());
392+
m_elf = xrt::elf(elf_path);
393+
auto mod = xrt::module{m_elf};
394+
auto kernel_name = get_kernel_name(dev, xclbin_name.c_str());
395+
396+
try {
397+
m_kernel_index = module_int::get_ctrlcode_id(mod, kernel_name);
398+
} catch (const std::exception& e) {
399+
m_kernel_index = module_int::no_ctrl_code_id;
400+
}
401+
402+
for (int i = 0; i < IO_TEST_BO_MAX_TYPES; i++) {
403+
auto& ibo = m_bo_array[i];
404+
auto type = static_cast<io_test_bo_type>(i);
405+
406+
switch(type) {
407+
case IO_TEST_BO_CMD:
408+
alloc_cmd_bo(ibo, m_dev);
409+
break;
410+
case IO_TEST_BO_INSTRUCTION:
411+
create_ctrl_bo_from_elf(ibo, patcher::buf_type::ctrltext);
412+
break;
413+
case IO_TEST_BO_INPUT:
414+
create_data_bo_from_file(ibo, "ifm.bin", m_FLAG_OPT);
415+
break;
416+
case IO_TEST_BO_PARAMETERS:
417+
create_data_bo_from_file(ibo, "wts.bin", m_FLAG_OPT);
418+
break;
419+
case IO_TEST_BO_OUTPUT:
420+
create_data_bo_from_file(ibo, "ofm.bin", m_FLAG_NO_FILL|m_FLAG_OPT);
421+
break;
422+
default:
423+
break;
424+
}
425+
}
426+
}
427+
387428
elf_preempt_io_test_bo_set::
388429
elf_preempt_io_test_bo_set(device* dev, const std::string& xclbin_name)
389430
: io_test_bo_set_base(dev, xclbin_name)
@@ -605,6 +646,30 @@ init_cmd(hw_ctx& hwctx, bool dump)
605646
patcher::buf_type::ctrltext, m_elf, module_int::no_ctrl_code_id);
606647
}
607648

649+
void
650+
elf_full_io_test_bo_set::
651+
init_cmd(hw_ctx& hwctx, bool dump)
652+
{
653+
exec_buf ebuf(*m_bo_array[IO_TEST_BO_CMD].tbo.get(), ERT_START_DPU);
654+
655+
xrt_core::cuidx_type cu_idx{0};
656+
ebuf.set_cu_idx(cu_idx);
657+
658+
if (m_bo_array[IO_TEST_BO_INPUT].tbo.get()) {
659+
ebuf.add_arg_64(3);
660+
ebuf.add_arg_bo(*m_bo_array[IO_TEST_BO_INPUT].tbo.get(), "0");
661+
ebuf.add_arg_bo(*m_bo_array[IO_TEST_BO_PARAMETERS].tbo.get(), "1");
662+
ebuf.add_arg_bo(*m_bo_array[IO_TEST_BO_OUTPUT].tbo.get(), "2");
663+
}
664+
665+
if (dump)
666+
ebuf.dump();
667+
668+
ebuf.add_ctrl_bo(*m_bo_array[IO_TEST_BO_INSTRUCTION].tbo.get());
669+
ebuf.patch_ctrl_code(*m_bo_array[IO_TEST_BO_INSTRUCTION].tbo.get(),
670+
patcher::buf_type::ctrltext, m_elf, m_kernel_index);
671+
}
672+
608673
void
609674
elf_preempt_io_test_bo_set::
610675
init_cmd(hw_ctx& hwctx, bool dump)
@@ -883,6 +948,32 @@ get_preemption_checkpoints()
883948
return 0;
884949
}
885950

951+
void
952+
io_test_bo_set_base::
953+
cache_cmd_header()
954+
{
955+
auto cbo = m_bo_array[IO_TEST_BO_CMD].tbo.get();
956+
if (!cbo)
957+
return;
958+
auto pkt = reinterpret_cast<ert_packet *>(cbo->map());
959+
if (!m_cached_cmd_header)
960+
m_cached_cmd_header = pkt->header;
961+
}
962+
963+
void
964+
io_test_bo_set_base::
965+
restore_cmd_header()
966+
{
967+
auto cbo = m_bo_array[IO_TEST_BO_CMD].tbo.get();
968+
if (!cbo || !m_cached_cmd_header)
969+
return;
970+
971+
auto pkt = reinterpret_cast<ert_packet *>(cbo->map());
972+
pkt->header = m_cached_cmd_header;
973+
pkt->state = ERT_CMD_STATE_NEW;
974+
std::atomic_thread_fence(std::memory_order_seq_cst);
975+
}
976+
886977
unsigned long
887978
elf_preempt_io_test_bo_set::
888979
get_preemption_checkpoints()

test/shim_test/io.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ class io_test_bo_set_base
7070
virtual void
7171
verify_result();
7272

73+
void
74+
cache_cmd_header();
75+
76+
void
77+
restore_cmd_header();
78+
7379
static const char *
7480
bo_type2name(int type);
7581

@@ -90,6 +96,7 @@ class io_test_bo_set_base
9096
const int m_FLAG_OPT = 1 << 1;
9197
const int m_FLAG_NO_FILL = 1 << 2;
9298
const int m_FLAG_DEV_BUF = 1 << 3;
99+
uint32_t m_cached_cmd_header = 0;
93100

94101
void
95102
create_data_bo_from_file(io_test_bo& ibo, const std::string filename, int flags);
@@ -124,6 +131,15 @@ class elf_io_test_bo_set : public io_test_bo_set_base
124131
init_cmd(hw_ctx& hwctx, bool dump) override;
125132
};
126133

134+
class elf_full_io_test_bo_set : public io_test_bo_set_base
135+
{
136+
public:
137+
elf_full_io_test_bo_set(device *dev, const std::string& xclbin_name);
138+
139+
void
140+
init_cmd(hw_ctx& hwctx, bool dump) override;
141+
};
142+
127143
class elf_preempt_io_test_bo_set : public io_test_bo_set_base
128144
{
129145
public:

test/shim_test/io_test.cpp

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ alloc_and_init_bo_set(device* dev, const char *xclbin)
4343
base = std::make_unique<elf_io_test_bo_set>(dev, std::string(xclbin));
4444
break;
4545
case KERNEL_TYPE_TXN_PREEMPT:
46+
case KERNEL_TYPE_TXN_FULL_ELF:
47+
base = std::make_unique<elf_full_io_test_bo_set>(dev, xclbin ? std::string(xclbin) : get_xclbin_name(dev));
48+
break;
4649
case KERNEL_TYPE_TXN_FULL_ELF_PREEMPT:
4750
base = std::make_unique<elf_preempt_io_test_bo_set>(dev, std::string(xclbin));
4851
break;
@@ -113,16 +116,28 @@ void
113116
io_test_cmd_submit_and_wait_latency(
114117
hwqueue_handle *hwq,
115118
int total_cmd_submission,
116-
std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos
119+
std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos,
120+
std::vector< std::unique_ptr<io_test_bo_set_base> >* bo_set_ptr = nullptr
117121
)
118122
{
119123
int completed = 0;
120-
int wait_idx = 0;
121124

122125
while (completed < total_cmd_submission) {
123-
for (auto& cmd : cmdlist_bos) {
126+
for (size_t i = 0; i < cmdlist_bos.size(); i++) {
127+
auto& cmd = cmdlist_bos[i];
128+
129+
// For UMQ, restore header before resubmission
130+
if (completed > 0 && bo_set_ptr && i < bo_set_ptr->size()) {
131+
(*bo_set_ptr)[i]->restore_cmd_header();
132+
}
133+
124134
hwq->submit_command(std::get<0>(cmd).get()->get());
125135
io_test_cmd_wait(hwq, std::get<0>(cmd));
136+
137+
if (bo_set_ptr && i < bo_set_ptr->size()) {
138+
std::atomic_thread_fence(std::memory_order_acquire);
139+
}
140+
126141
auto state = std::get<1>(cmd)->state;
127142
if (state != ERT_CMD_STATE_COMPLETED) {
128143
std::string errmsg = "Command ";
@@ -135,7 +150,9 @@ io_test_cmd_submit_and_wait_latency(
135150
completed++;
136151
if (completed >= total_cmd_submission)
137152
break;
138-
std::get<1>(cmd)->state = ERT_CMD_STATE_NEW;
153+
154+
if (!bo_set_ptr)
155+
std::get<1>(cmd)->state = ERT_CMD_STATE_NEW;
139156
}
140157
}
141158
}
@@ -144,15 +161,18 @@ void
144161
io_test_cmd_submit_and_wait_thruput(
145162
hwqueue_handle *hwq,
146163
int total_cmd_submission,
147-
std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos
164+
std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos,
165+
std::vector< std::unique_ptr<io_test_bo_set_base> >* bo_set_ptr = nullptr
148166
)
149167
{
150168
int issued = 0;
151169
int completed = 0;
152170
int wait_idx = 0;
153171

154172
for (auto& cmd : cmdlist_bos) {
155-
std::get<1>(cmd)->state = ERT_CMD_STATE_NEW;
173+
if (!bo_set_ptr)
174+
std::get<1>(cmd)->state = ERT_CMD_STATE_NEW;
175+
156176
hwq->submit_command(std::get<0>(cmd).get()->get());
157177
issued++;
158178
if (issued >= total_cmd_submission)
@@ -167,7 +187,12 @@ io_test_cmd_submit_and_wait_thruput(
167187
completed++;
168188

169189
if (issued < total_cmd_submission) {
170-
std::get<1>(cmdlist_bos[wait_idx])->state = ERT_CMD_STATE_NEW;
190+
// For UMQ, restore header; for KMQ, just reset state
191+
if (bo_set_ptr && wait_idx < bo_set_ptr->size())
192+
(*bo_set_ptr)[wait_idx]->restore_cmd_header();
193+
else
194+
std::get<1>(cmdlist_bos[wait_idx])->state = ERT_CMD_STATE_NEW;
195+
171196
hwq->submit_command(std::get<0>(cmdlist_bos[wait_idx]).get()->get());
172197
issued++;
173198
}
@@ -255,6 +280,7 @@ io_test(device::id_type id, device* dev, int total_hwq_submit, int num_cmdlist,
255280
// Initialize cmd before submission
256281
for (auto& boset : bo_set) {
257282
boset->init_cmd(hwctx, io_test_parameters.debug);
283+
boset->cache_cmd_header();
258284
boset->sync_before_run();
259285
}
260286

@@ -292,10 +318,15 @@ io_test(device::id_type id, device* dev, int total_hwq_submit, int num_cmdlist,
292318

293319
// Submit commands and wait for results
294320
auto start = clk::now();
321+
auto device_id = device_query<query::pcie_device>(dev);
322+
bool is_umq = (device_id == npu3_device_id || device_id == npu3_device_id1);
323+
295324
if (io_test_parameters.perf == IO_TEST_THRUPUT_PERF)
296-
io_test_cmd_submit_and_wait_thruput(hwq, total_hwq_submit, cmdlist_bos);
325+
io_test_cmd_submit_and_wait_thruput(hwq, total_hwq_submit, cmdlist_bos,
326+
is_umq ? &bo_set : nullptr);
297327
else
298-
io_test_cmd_submit_and_wait_latency(hwq, total_hwq_submit, cmdlist_bos);
328+
io_test_cmd_submit_and_wait_latency(hwq, total_hwq_submit, cmdlist_bos,
329+
is_umq ? &bo_set : nullptr);
299330
auto end = clk::now();
300331

301332
// Verify preemption counters

0 commit comments

Comments
 (0)