Skip to content

Commit 5a40d28

Browse files
benvanikkeshavvinayak01
authored andcommitted
Adding iree_hal_device_queue_dispatch. (iree-org#21630)
This is emulated today by creating a command buffer with a single dispatch and executing it. HAL implementations that can more efficiently perform a single dispatch in queue order should do so. We don't want people doing queue-level fill/copy/dispatch, but they are useful for glue code and allowing users to avoid the command buffer cost for O(1) operations is worth it. Fixes iree-org#21629. Signed-off-by: keshavvinayak01 <[email protected]>
1 parent 3ea76df commit 5a40d28

File tree

10 files changed

+171
-34
lines changed

10 files changed

+171
-34
lines changed

experimental/webgpu/webgpu_device.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ const iree_hal_device_vtable_t iree_hal_webgpu_device_vtable = {
470470
.queue_copy = iree_hal_device_queue_emulated_copy,
471471
.queue_read = iree_hal_webgpu_device_queue_read,
472472
.queue_write = iree_hal_webgpu_device_queue_write,
473+
.queue_dispatch = iree_hal_device_queue_emulated_dispatch,
473474
.queue_execute = iree_hal_webgpu_device_queue_execute,
474475
.queue_flush = iree_hal_webgpu_device_queue_flush,
475476
.wait_semaphores = iree_hal_webgpu_device_wait_semaphores,

runtime/src/iree/hal/device.c

Lines changed: 109 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,31 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_dealloca(
146146
return status;
147147
}
148148

149+
IREE_API_EXPORT iree_status_t iree_hal_device_queue_fill(
150+
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
151+
const iree_hal_semaphore_list_t wait_semaphore_list,
152+
const iree_hal_semaphore_list_t signal_semaphore_list,
153+
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
154+
iree_device_size_t length, const void* pattern,
155+
iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
156+
IREE_ASSERT_ARGUMENT(device);
157+
IREE_ASSERT_ARGUMENT(
158+
!wait_semaphore_list.count ||
159+
(wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
160+
IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
161+
(signal_semaphore_list.semaphores &&
162+
signal_semaphore_list.payload_values));
163+
IREE_ASSERT_ARGUMENT(pattern);
164+
IREE_ASSERT_ARGUMENT(target_buffer);
165+
IREE_TRACE_ZONE_BEGIN(z0);
166+
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)length);
167+
iree_status_t status = _VTABLE_DISPATCH(device, queue_fill)(
168+
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
169+
target_buffer, target_offset, length, pattern, pattern_length, flags);
170+
IREE_TRACE_ZONE_END(z0);
171+
return status;
172+
}
173+
149174
IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_fill(
150175
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
151176
const iree_hal_semaphore_list_t wait_semaphore_list,
@@ -196,27 +221,28 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_fill(
196221
return status;
197222
}
198223

199-
IREE_API_EXPORT iree_status_t iree_hal_device_queue_fill(
224+
IREE_API_EXPORT iree_status_t iree_hal_device_queue_update(
200225
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
201226
const iree_hal_semaphore_list_t wait_semaphore_list,
202227
const iree_hal_semaphore_list_t signal_semaphore_list,
228+
const void* source_buffer, iree_host_size_t source_offset,
203229
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
204-
iree_device_size_t length, const void* pattern,
205-
iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
230+
iree_device_size_t length, iree_hal_update_flags_t flags) {
206231
IREE_ASSERT_ARGUMENT(device);
207232
IREE_ASSERT_ARGUMENT(
208233
!wait_semaphore_list.count ||
209234
(wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
210235
IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
211236
(signal_semaphore_list.semaphores &&
212237
signal_semaphore_list.payload_values));
213-
IREE_ASSERT_ARGUMENT(pattern);
238+
IREE_ASSERT_ARGUMENT(source_buffer);
214239
IREE_ASSERT_ARGUMENT(target_buffer);
215240
IREE_TRACE_ZONE_BEGIN(z0);
216241
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)length);
217-
iree_status_t status = _VTABLE_DISPATCH(device, queue_fill)(
242+
iree_status_t status = _VTABLE_DISPATCH(device, queue_update)(
218243
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
219-
target_buffer, target_offset, length, pattern, pattern_length, flags);
244+
source_buffer, source_offset, target_buffer, target_offset, length,
245+
flags);
220246
IREE_TRACE_ZONE_END(z0);
221247
return status;
222248
}
@@ -281,13 +307,13 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_update(
281307
return status;
282308
}
283309

284-
IREE_API_EXPORT iree_status_t iree_hal_device_queue_update(
310+
IREE_API_EXPORT iree_status_t iree_hal_device_queue_copy(
285311
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
286312
const iree_hal_semaphore_list_t wait_semaphore_list,
287313
const iree_hal_semaphore_list_t signal_semaphore_list,
288-
const void* source_buffer, iree_host_size_t source_offset,
314+
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
289315
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
290-
iree_device_size_t length, iree_hal_update_flags_t flags) {
316+
iree_device_size_t length, iree_hal_copy_flags_t flags) {
291317
IREE_ASSERT_ARGUMENT(device);
292318
IREE_ASSERT_ARGUMENT(
293319
!wait_semaphore_list.count ||
@@ -299,7 +325,7 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_update(
299325
IREE_ASSERT_ARGUMENT(target_buffer);
300326
IREE_TRACE_ZONE_BEGIN(z0);
301327
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)length);
302-
iree_status_t status = _VTABLE_DISPATCH(device, queue_update)(
328+
iree_status_t status = _VTABLE_DISPATCH(device, queue_copy)(
303329
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
304330
source_buffer, source_offset, target_buffer, target_offset, length,
305331
flags);
@@ -357,76 +383,125 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_copy(
357383
return status;
358384
}
359385

360-
IREE_API_EXPORT iree_status_t iree_hal_device_queue_copy(
386+
IREE_API_EXPORT iree_status_t iree_hal_device_queue_read(
361387
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
362388
const iree_hal_semaphore_list_t wait_semaphore_list,
363389
const iree_hal_semaphore_list_t signal_semaphore_list,
364-
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
390+
iree_hal_file_t* source_file, uint64_t source_offset,
365391
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
366-
iree_device_size_t length, iree_hal_copy_flags_t flags) {
392+
iree_device_size_t length, iree_hal_read_flags_t flags) {
367393
IREE_ASSERT_ARGUMENT(device);
368394
IREE_ASSERT_ARGUMENT(
369395
!wait_semaphore_list.count ||
370396
(wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
371397
IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
372398
(signal_semaphore_list.semaphores &&
373399
signal_semaphore_list.payload_values));
374-
IREE_ASSERT_ARGUMENT(source_buffer);
400+
IREE_ASSERT_ARGUMENT(source_file);
375401
IREE_ASSERT_ARGUMENT(target_buffer);
376402
IREE_TRACE_ZONE_BEGIN(z0);
377-
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)length);
378-
iree_status_t status = _VTABLE_DISPATCH(device, queue_copy)(
403+
iree_status_t status = _VTABLE_DISPATCH(device, queue_read)(
379404
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
380-
source_buffer, source_offset, target_buffer, target_offset, length,
381-
flags);
405+
source_file, source_offset, target_buffer, target_offset, length, flags);
382406
IREE_TRACE_ZONE_END(z0);
383407
return status;
384408
}
385409

386-
IREE_API_EXPORT iree_status_t iree_hal_device_queue_read(
410+
IREE_API_EXPORT iree_status_t iree_hal_device_queue_write(
387411
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
388412
const iree_hal_semaphore_list_t wait_semaphore_list,
389413
const iree_hal_semaphore_list_t signal_semaphore_list,
390-
iree_hal_file_t* source_file, uint64_t source_offset,
391-
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
392-
iree_device_size_t length, iree_hal_read_flags_t flags) {
414+
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
415+
iree_hal_file_t* target_file, uint64_t target_offset,
416+
iree_device_size_t length, iree_hal_write_flags_t flags) {
393417
IREE_ASSERT_ARGUMENT(device);
394418
IREE_ASSERT_ARGUMENT(
395419
!wait_semaphore_list.count ||
396420
(wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
397421
IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
398422
(signal_semaphore_list.semaphores &&
399423
signal_semaphore_list.payload_values));
400-
IREE_ASSERT_ARGUMENT(source_file);
401-
IREE_ASSERT_ARGUMENT(target_buffer);
424+
IREE_ASSERT_ARGUMENT(source_buffer);
425+
IREE_ASSERT_ARGUMENT(target_file);
402426
IREE_TRACE_ZONE_BEGIN(z0);
403-
iree_status_t status = _VTABLE_DISPATCH(device, queue_read)(
427+
iree_status_t status = _VTABLE_DISPATCH(device, queue_write)(
404428
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
405-
source_file, source_offset, target_buffer, target_offset, length, flags);
429+
source_buffer, source_offset, target_file, target_offset, length, flags);
406430
IREE_TRACE_ZONE_END(z0);
407431
return status;
408432
}
409433

410-
IREE_API_EXPORT iree_status_t iree_hal_device_queue_write(
434+
IREE_API_EXPORT iree_status_t iree_hal_device_queue_dispatch(
411435
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
412436
const iree_hal_semaphore_list_t wait_semaphore_list,
413437
const iree_hal_semaphore_list_t signal_semaphore_list,
414-
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
415-
iree_hal_file_t* target_file, uint64_t target_offset,
416-
iree_device_size_t length, iree_hal_write_flags_t flags) {
438+
iree_hal_executable_t* executable, int32_t entry_point,
439+
const iree_hal_dispatch_config_t config, iree_const_byte_span_t constants,
440+
const iree_hal_buffer_ref_list_t bindings,
441+
iree_hal_dispatch_flags_t flags) {
417442
IREE_ASSERT_ARGUMENT(device);
418443
IREE_ASSERT_ARGUMENT(
419444
!wait_semaphore_list.count ||
420445
(wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
421446
IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
422447
(signal_semaphore_list.semaphores &&
423448
signal_semaphore_list.payload_values));
424-
IREE_ASSERT_ARGUMENT(source_buffer);
425-
IREE_ASSERT_ARGUMENT(target_file);
449+
IREE_ASSERT_ARGUMENT(executable);
426450
IREE_TRACE_ZONE_BEGIN(z0);
427-
iree_status_t status = _VTABLE_DISPATCH(device, queue_write)(
451+
iree_status_t status = _VTABLE_DISPATCH(device, queue_dispatch)(
428452
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
429-
source_buffer, source_offset, target_file, target_offset, length, flags);
453+
executable, entry_point, config, constants, bindings, flags);
454+
IREE_TRACE_ZONE_END(z0);
455+
return status;
456+
}
457+
458+
IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_dispatch(
459+
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
460+
const iree_hal_semaphore_list_t wait_semaphore_list,
461+
const iree_hal_semaphore_list_t signal_semaphore_list,
462+
iree_hal_executable_t* executable, int32_t entry_point,
463+
const iree_hal_dispatch_config_t config, iree_const_byte_span_t constants,
464+
const iree_hal_buffer_ref_list_t bindings,
465+
iree_hal_dispatch_flags_t flags) {
466+
IREE_ASSERT_ARGUMENT(device);
467+
IREE_ASSERT_ARGUMENT(executable);
468+
IREE_TRACE_ZONE_BEGIN(z0);
469+
470+
// If we are starting execution immediately then we can reduce latency by
471+
// allowing inline command buffer execution.
472+
iree_hal_command_buffer_mode_t command_buffer_mode =
473+
IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT;
474+
if (wait_semaphore_list.count == 0) {
475+
command_buffer_mode |= IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION;
476+
}
477+
478+
iree_hal_command_buffer_t* command_buffer = NULL;
479+
IREE_RETURN_AND_END_ZONE_IF_ERROR(
480+
z0, iree_hal_command_buffer_create(
481+
device, command_buffer_mode, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
482+
queue_affinity, /*binding_capacity=*/0, &command_buffer));
483+
484+
iree_status_t status = iree_hal_command_buffer_begin(command_buffer);
485+
486+
if (iree_status_is_ok(status)) {
487+
status = iree_hal_command_buffer_dispatch(command_buffer, executable,
488+
entry_point, config, constants,
489+
bindings, flags);
490+
}
491+
492+
if (iree_status_is_ok(status)) {
493+
status = iree_hal_command_buffer_end(command_buffer);
494+
}
495+
496+
if (iree_status_is_ok(status)) {
497+
status = iree_hal_device_queue_execute(
498+
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
499+
command_buffer, iree_hal_buffer_binding_table_empty(),
500+
IREE_HAL_EXECUTE_FLAG_NONE);
501+
}
502+
503+
iree_hal_command_buffer_release(command_buffer);
504+
430505
IREE_TRACE_ZONE_END(z0);
431506
return status;
432507
}

runtime/src/iree/hal/device.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,27 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_write(
385385
iree_hal_file_t* target_file, uint64_t target_offset,
386386
iree_device_size_t length, iree_hal_write_flags_t flags);
387387

388+
// Enqueues a dispatch over a 3D grid of workgroups.
389+
// The request may execute overlapped with any other queue operations. The
390+
// executable specified must be registered for use with the device driver owning
391+
// the queue it is scheduled on.
392+
//
393+
// The provided constant data and binding list will be recorded into the queue
394+
// and need not remain live beyond the call. Binding buffers will be retained by
395+
// the queue until it the operation has completed.
396+
//
397+
// All provided |bindings| must be directly specified and not reference binding
398+
// table slots.
399+
//
400+
// See iree_hal_command_buffer_dispatch for more information.
401+
IREE_API_EXPORT iree_status_t iree_hal_device_queue_dispatch(
402+
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
403+
const iree_hal_semaphore_list_t wait_semaphore_list,
404+
const iree_hal_semaphore_list_t signal_semaphore_list,
405+
iree_hal_executable_t* executable, int32_t entry_point,
406+
const iree_hal_dispatch_config_t config, iree_const_byte_span_t constants,
407+
const iree_hal_buffer_ref_list_t bindings, iree_hal_dispatch_flags_t flags);
408+
388409
// Executes a command buffer on a device queue.
389410
// No commands will execute until the wait fence has been reached and the signal
390411
// fence will be signaled when all commands have completed. If a command buffer
@@ -630,6 +651,15 @@ typedef struct iree_hal_device_vtable_t {
630651
iree_hal_file_t* target_file, uint64_t target_offset,
631652
iree_device_size_t length, iree_hal_write_flags_t flags);
632653

654+
iree_status_t(IREE_API_PTR* queue_dispatch)(
655+
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
656+
const iree_hal_semaphore_list_t wait_semaphore_list,
657+
const iree_hal_semaphore_list_t signal_semaphore_list,
658+
iree_hal_executable_t* executable, int32_t entry_point,
659+
const iree_hal_dispatch_config_t config, iree_const_byte_span_t constants,
660+
const iree_hal_buffer_ref_list_t bindings,
661+
iree_hal_dispatch_flags_t flags);
662+
633663
iree_status_t(IREE_API_PTR* queue_execute)(
634664
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
635665
const iree_hal_semaphore_list_t wait_semaphore_list,
@@ -679,6 +709,14 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_copy(
679709
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
680710
iree_device_size_t length, iree_hal_copy_flags_t flags);
681711

712+
IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_dispatch(
713+
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
714+
const iree_hal_semaphore_list_t wait_semaphore_list,
715+
const iree_hal_semaphore_list_t signal_semaphore_list,
716+
iree_hal_executable_t* executable, int32_t entry_point,
717+
const iree_hal_dispatch_config_t config, iree_const_byte_span_t constants,
718+
const iree_hal_buffer_ref_list_t bindings, iree_hal_dispatch_flags_t flags);
719+
682720
#ifdef __cplusplus
683721
} // extern "C"
684722
#endif // __cplusplus

runtime/src/iree/hal/drivers/cuda/cuda_device.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,7 @@ static const iree_hal_device_vtable_t iree_hal_cuda_device_vtable = {
11281128
.queue_copy = iree_hal_device_queue_emulated_copy,
11291129
.queue_read = iree_hal_cuda_device_queue_read,
11301130
.queue_write = iree_hal_cuda_device_queue_write,
1131+
.queue_dispatch = iree_hal_device_queue_emulated_dispatch,
11311132
.queue_execute = iree_hal_cuda_device_queue_execute,
11321133
.queue_flush = iree_hal_cuda_device_queue_flush,
11331134
.wait_semaphores = iree_hal_cuda_device_wait_semaphores,

runtime/src/iree/hal/drivers/hip/hip_device.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2693,6 +2693,7 @@ static const iree_hal_device_vtable_t iree_hal_hip_device_vtable = {
26932693
.queue_copy = iree_hal_device_queue_emulated_copy,
26942694
.queue_read = iree_hal_hip_device_queue_read,
26952695
.queue_write = iree_hal_hip_device_queue_write,
2696+
.queue_dispatch = iree_hal_device_queue_emulated_dispatch,
26962697
.queue_execute = iree_hal_hip_device_queue_execute,
26972698
.queue_flush = iree_hal_hip_device_queue_flush,
26982699
.wait_semaphores = iree_hal_hip_device_wait_semaphores,

runtime/src/iree/hal/drivers/local_sync/sync_device.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,7 @@ static const iree_hal_device_vtable_t iree_hal_sync_device_vtable = {
505505
.queue_copy = iree_hal_device_queue_emulated_copy,
506506
.queue_read = iree_hal_sync_device_queue_read,
507507
.queue_write = iree_hal_sync_device_queue_write,
508+
.queue_dispatch = iree_hal_device_queue_emulated_dispatch,
508509
.queue_execute = iree_hal_sync_device_queue_execute,
509510
.queue_flush = iree_hal_sync_device_queue_flush,
510511
.wait_semaphores = iree_hal_sync_device_wait_semaphores,

runtime/src/iree/hal/drivers/local_task/task_device.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,7 @@ static const iree_hal_device_vtable_t iree_hal_task_device_vtable = {
539539
.queue_copy = iree_hal_device_queue_emulated_copy,
540540
.queue_read = iree_hal_task_device_queue_read,
541541
.queue_write = iree_hal_task_device_queue_write,
542+
.queue_dispatch = iree_hal_device_queue_emulated_dispatch,
542543
.queue_execute = iree_hal_task_device_queue_execute,
543544
.queue_flush = iree_hal_task_device_queue_flush,
544545
.wait_semaphores = iree_hal_task_device_wait_semaphores,

runtime/src/iree/hal/drivers/metal/metal_device.m

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,7 @@ static iree_status_t iree_hal_metal_device_profiling_end(iree_hal_device_t* base
618618
.queue_copy = iree_hal_device_queue_emulated_copy,
619619
.queue_read = iree_hal_metal_device_queue_read,
620620
.queue_write = iree_hal_metal_device_queue_write,
621+
.queue_dispatch = iree_hal_device_queue_emulated_dispatch,
621622
.queue_execute = iree_hal_metal_device_queue_execute,
622623
.queue_flush = iree_hal_metal_device_queue_flush,
623624
.wait_semaphores = iree_hal_metal_device_wait_semaphores,

runtime/src/iree/hal/drivers/null/device.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,22 @@ static iree_status_t iree_hal_null_device_queue_write(
458458
return loop_status;
459459
}
460460

461+
static iree_status_t iree_hal_null_device_queue_dispatch(
462+
iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
463+
const iree_hal_semaphore_list_t wait_semaphore_list,
464+
const iree_hal_semaphore_list_t signal_semaphore_list,
465+
iree_hal_executable_t* executable, int32_t entry_point,
466+
const iree_hal_dispatch_config_t config, iree_const_byte_span_t constants,
467+
const iree_hal_buffer_ref_list_t bindings,
468+
iree_hal_dispatch_flags_t flags) {
469+
// TODO(null): if a native queue dispatch operation is available use that
470+
// instead. The emulated dispatch creates a command buffer and executes it and
471+
// it's best if the extra recording/upload/allocation time can be avoided.
472+
return iree_hal_device_queue_emulated_dispatch(
473+
base_device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
474+
executable, entry_point, config, constants, bindings, flags);
475+
}
476+
461477
static iree_status_t iree_hal_null_device_queue_execute(
462478
iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
463479
const iree_hal_semaphore_list_t wait_semaphore_list,
@@ -603,6 +619,7 @@ static const iree_hal_device_vtable_t iree_hal_null_device_vtable = {
603619
.queue_copy = iree_hal_null_device_queue_copy,
604620
.queue_read = iree_hal_null_device_queue_read,
605621
.queue_write = iree_hal_null_device_queue_write,
622+
.queue_dispatch = iree_hal_null_device_queue_dispatch,
606623
.queue_execute = iree_hal_null_device_queue_execute,
607624
.queue_flush = iree_hal_null_device_queue_flush,
608625
.wait_semaphores = iree_hal_null_device_wait_semaphores,

runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1893,6 +1893,7 @@ const iree_hal_device_vtable_t iree_hal_vulkan_device_vtable = {
18931893
/*.queue_copy=*/iree_hal_device_queue_emulated_copy,
18941894
/*.queue_read=*/iree_hal_vulkan_device_queue_read,
18951895
/*.queue_write=*/iree_hal_vulkan_device_queue_write,
1896+
/*.queue_dispatch=*/iree_hal_device_queue_emulated_dispatch,
18961897
/*.queue_execute=*/iree_hal_vulkan_device_queue_execute,
18971898
/*.queue_flush=*/iree_hal_vulkan_device_queue_flush,
18981899
/*.wait_semaphores=*/iree_hal_vulkan_device_wait_semaphores,

0 commit comments

Comments
 (0)