@@ -43,6 +43,9 @@ alloc_and_init_bo_set(device* dev, const char *xclbin)
4343 base = std::make_unique<elf_io_test_bo_set>(dev, std::string (xclbin));
4444 break ;
4545 case KERNEL_TYPE_TXN_PREEMPT:
46+ case KERNEL_TYPE_TXN_FULL_ELF:
47+ base = std::make_unique<elf_full_io_test_bo_set>(dev, xclbin ? std::string (xclbin) : get_xclbin_name (dev));
48+ break ;
4649 case KERNEL_TYPE_TXN_FULL_ELF_PREEMPT:
4750 base = std::make_unique<elf_preempt_io_test_bo_set>(dev, std::string (xclbin));
4851 break ;
@@ -113,16 +116,25 @@ void
113116io_test_cmd_submit_and_wait_latency (
114117 hwqueue_handle *hwq,
115118 int total_cmd_submission,
116- std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos
119+ std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos,
120+ std::vector< std::unique_ptr<io_test_bo_set_base> >* bo_set_ptr = nullptr
117121 )
118122{
119123 int completed = 0 ;
120- int wait_idx = 0 ;
121124
122125 while (completed < total_cmd_submission) {
123- for (auto & cmd : cmdlist_bos) {
126+ for (size_t i = 0 ; i < cmdlist_bos.size (); i++) {
127+ auto & cmd = cmdlist_bos[i];
128+ // For UMQ, restore header before resubmission
129+ if (completed > 0 && bo_set_ptr && i < bo_set_ptr->size ()) {
130+ (*bo_set_ptr)[i]->restore_cmd_header ();
131+ }
132+
124133 hwq->submit_command (std::get<0 >(cmd).get ()->get ());
125134 io_test_cmd_wait (hwq, std::get<0 >(cmd));
135+
136+ if (bo_set_ptr && i < bo_set_ptr->size ())
137+ std::atomic_thread_fence (std::memory_order_acquire);
126138 auto state = std::get<1 >(cmd)->state ;
127139 if (state != ERT_CMD_STATE_COMPLETED) {
128140 std::string errmsg = " Command " ;
@@ -135,7 +147,8 @@ io_test_cmd_submit_and_wait_latency(
135147 completed++;
136148 if (completed >= total_cmd_submission)
137149 break ;
138- std::get<1 >(cmd)->state = ERT_CMD_STATE_NEW;
150+ if (!bo_set_ptr)
151+ std::get<1 >(cmd)->state = ERT_CMD_STATE_NEW;
139152 }
140153 }
141154}
@@ -144,15 +157,17 @@ void
144157io_test_cmd_submit_and_wait_thruput (
145158 hwqueue_handle *hwq,
146159 int total_cmd_submission,
147- std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos
160+ std::vector< std::pair<std::shared_ptr<bo>, ert_start_kernel_cmd *> >& cmdlist_bos,
161+ std::vector< std::unique_ptr<io_test_bo_set_base> >* bo_set_ptr = nullptr
148162 )
149163{
150164 int issued = 0 ;
151165 int completed = 0 ;
152166 int wait_idx = 0 ;
153167
154168 for (auto & cmd : cmdlist_bos) {
155- std::get<1 >(cmd)->state = ERT_CMD_STATE_NEW;
169+ if (!bo_set_ptr)
170+ std::get<1 >(cmd)->state = ERT_CMD_STATE_NEW;
156171 hwq->submit_command (std::get<0 >(cmd).get ()->get ());
157172 issued++;
158173 if (issued >= total_cmd_submission)
@@ -167,11 +182,15 @@ io_test_cmd_submit_and_wait_thruput(
167182 completed++;
168183
169184 if (issued < total_cmd_submission) {
170- std::get<1 >(cmdlist_bos[wait_idx])->state = ERT_CMD_STATE_NEW;
185+ // For UMQ, restore header; for KMQ, just reset state
186+ if (bo_set_ptr && wait_idx < bo_set_ptr->size ())
187+ (*bo_set_ptr)[wait_idx]->restore_cmd_header ();
188+ else
189+ std::get<1 >(cmdlist_bos[wait_idx])->state = ERT_CMD_STATE_NEW;
190+
171191 hwq->submit_command (std::get<0 >(cmdlist_bos[wait_idx]).get ()->get ());
172192 issued++;
173193 }
174-
175194 if (++wait_idx == cmdlist_bos.size ())
176195 wait_idx = 0 ;
177196 }
@@ -255,6 +274,7 @@ io_test(device::id_type id, device* dev, int total_hwq_submit, int num_cmdlist,
255274 // Initialize cmd before submission
256275 for (auto & boset : bo_set) {
257276 boset->init_cmd (hwctx, io_test_parameters.debug );
277+ boset->cache_cmd_header ();
258278 boset->sync_before_run ();
259279 }
260280
@@ -292,10 +312,15 @@ io_test(device::id_type id, device* dev, int total_hwq_submit, int num_cmdlist,
292312
293313 // Submit commands and wait for results
294314 auto start = clk::now ();
315+ auto device_id = device_query<query::pcie_device>(dev);
316+ bool is_umq = (device_id == npu3_device_id || device_id == npu3_device_id1);
317+
295318 if (io_test_parameters.perf == IO_TEST_THRUPUT_PERF)
296- io_test_cmd_submit_and_wait_thruput (hwq, total_hwq_submit, cmdlist_bos);
319+ io_test_cmd_submit_and_wait_thruput (hwq, total_hwq_submit, cmdlist_bos,
320+ is_umq ? &bo_set : nullptr );
297321 else
298- io_test_cmd_submit_and_wait_latency (hwq, total_hwq_submit, cmdlist_bos);
322+ io_test_cmd_submit_and_wait_latency (hwq, total_hwq_submit, cmdlist_bos,
323+ is_umq ? &bo_set : nullptr );
299324 auto end = clk::now ();
300325
301326 // Verify preemption counters
0 commit comments