@@ -146,6 +146,31 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_dealloca(
146146 return status ;
147147}
148148
149+ IREE_API_EXPORT iree_status_t iree_hal_device_queue_fill (
150+ iree_hal_device_t * device , iree_hal_queue_affinity_t queue_affinity ,
151+ const iree_hal_semaphore_list_t wait_semaphore_list ,
152+ const iree_hal_semaphore_list_t signal_semaphore_list ,
153+ iree_hal_buffer_t * target_buffer , iree_device_size_t target_offset ,
154+ iree_device_size_t length , const void * pattern ,
155+ iree_host_size_t pattern_length , iree_hal_fill_flags_t flags ) {
156+ IREE_ASSERT_ARGUMENT (device );
157+ IREE_ASSERT_ARGUMENT (
158+ !wait_semaphore_list .count ||
159+ (wait_semaphore_list .semaphores && wait_semaphore_list .payload_values ));
160+ IREE_ASSERT_ARGUMENT (!signal_semaphore_list .count ||
161+ (signal_semaphore_list .semaphores &&
162+ signal_semaphore_list .payload_values ));
163+ IREE_ASSERT_ARGUMENT (pattern );
164+ IREE_ASSERT_ARGUMENT (target_buffer );
165+ IREE_TRACE_ZONE_BEGIN (z0 );
166+ IREE_TRACE_ZONE_APPEND_VALUE_I64 (z0 , (int64_t )length );
167+ iree_status_t status = _VTABLE_DISPATCH (device , queue_fill )(
168+ device , queue_affinity , wait_semaphore_list , signal_semaphore_list ,
169+ target_buffer , target_offset , length , pattern , pattern_length , flags );
170+ IREE_TRACE_ZONE_END (z0 );
171+ return status ;
172+ }
173+
149174IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_fill (
150175 iree_hal_device_t * device , iree_hal_queue_affinity_t queue_affinity ,
151176 const iree_hal_semaphore_list_t wait_semaphore_list ,
@@ -196,27 +221,28 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_fill(
196221 return status ;
197222}
198223
199- IREE_API_EXPORT iree_status_t iree_hal_device_queue_fill (
224+ IREE_API_EXPORT iree_status_t iree_hal_device_queue_update (
200225 iree_hal_device_t * device , iree_hal_queue_affinity_t queue_affinity ,
201226 const iree_hal_semaphore_list_t wait_semaphore_list ,
202227 const iree_hal_semaphore_list_t signal_semaphore_list ,
228+ const void * source_buffer , iree_host_size_t source_offset ,
203229 iree_hal_buffer_t * target_buffer , iree_device_size_t target_offset ,
204- iree_device_size_t length , const void * pattern ,
205- iree_host_size_t pattern_length , iree_hal_fill_flags_t flags ) {
230+ iree_device_size_t length , iree_hal_update_flags_t flags ) {
206231 IREE_ASSERT_ARGUMENT (device );
207232 IREE_ASSERT_ARGUMENT (
208233 !wait_semaphore_list .count ||
209234 (wait_semaphore_list .semaphores && wait_semaphore_list .payload_values ));
210235 IREE_ASSERT_ARGUMENT (!signal_semaphore_list .count ||
211236 (signal_semaphore_list .semaphores &&
212237 signal_semaphore_list .payload_values ));
213- IREE_ASSERT_ARGUMENT (pattern );
238+ IREE_ASSERT_ARGUMENT (source_buffer );
214239 IREE_ASSERT_ARGUMENT (target_buffer );
215240 IREE_TRACE_ZONE_BEGIN (z0 );
216241 IREE_TRACE_ZONE_APPEND_VALUE_I64 (z0 , (int64_t )length );
217- iree_status_t status = _VTABLE_DISPATCH (device , queue_fill )(
242+ iree_status_t status = _VTABLE_DISPATCH (device , queue_update )(
218243 device , queue_affinity , wait_semaphore_list , signal_semaphore_list ,
219- target_buffer , target_offset , length , pattern , pattern_length , flags );
244+ source_buffer , source_offset , target_buffer , target_offset , length ,
245+ flags );
220246 IREE_TRACE_ZONE_END (z0 );
221247 return status ;
222248}
@@ -281,13 +307,13 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_update(
281307 return status ;
282308}
283309
284- IREE_API_EXPORT iree_status_t iree_hal_device_queue_update (
310+ IREE_API_EXPORT iree_status_t iree_hal_device_queue_copy (
285311 iree_hal_device_t * device , iree_hal_queue_affinity_t queue_affinity ,
286312 const iree_hal_semaphore_list_t wait_semaphore_list ,
287313 const iree_hal_semaphore_list_t signal_semaphore_list ,
288- const void * source_buffer , iree_host_size_t source_offset ,
314+ iree_hal_buffer_t * source_buffer , iree_device_size_t source_offset ,
289315 iree_hal_buffer_t * target_buffer , iree_device_size_t target_offset ,
290- iree_device_size_t length , iree_hal_update_flags_t flags ) {
316+ iree_device_size_t length , iree_hal_copy_flags_t flags ) {
291317 IREE_ASSERT_ARGUMENT (device );
292318 IREE_ASSERT_ARGUMENT (
293319 !wait_semaphore_list .count ||
@@ -299,7 +325,7 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_update(
299325 IREE_ASSERT_ARGUMENT (target_buffer );
300326 IREE_TRACE_ZONE_BEGIN (z0 );
301327 IREE_TRACE_ZONE_APPEND_VALUE_I64 (z0 , (int64_t )length );
302- iree_status_t status = _VTABLE_DISPATCH (device , queue_update )(
328+ iree_status_t status = _VTABLE_DISPATCH (device , queue_copy )(
303329 device , queue_affinity , wait_semaphore_list , signal_semaphore_list ,
304330 source_buffer , source_offset , target_buffer , target_offset , length ,
305331 flags );
@@ -357,76 +383,125 @@ IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_copy(
357383 return status ;
358384}
359385
360- IREE_API_EXPORT iree_status_t iree_hal_device_queue_copy (
386+ IREE_API_EXPORT iree_status_t iree_hal_device_queue_read (
361387 iree_hal_device_t * device , iree_hal_queue_affinity_t queue_affinity ,
362388 const iree_hal_semaphore_list_t wait_semaphore_list ,
363389 const iree_hal_semaphore_list_t signal_semaphore_list ,
364- iree_hal_buffer_t * source_buffer , iree_device_size_t source_offset ,
390+ iree_hal_file_t * source_file , uint64_t source_offset ,
365391 iree_hal_buffer_t * target_buffer , iree_device_size_t target_offset ,
366- iree_device_size_t length , iree_hal_copy_flags_t flags ) {
392+ iree_device_size_t length , iree_hal_read_flags_t flags ) {
367393 IREE_ASSERT_ARGUMENT (device );
368394 IREE_ASSERT_ARGUMENT (
369395 !wait_semaphore_list .count ||
370396 (wait_semaphore_list .semaphores && wait_semaphore_list .payload_values ));
371397 IREE_ASSERT_ARGUMENT (!signal_semaphore_list .count ||
372398 (signal_semaphore_list .semaphores &&
373399 signal_semaphore_list .payload_values ));
374- IREE_ASSERT_ARGUMENT (source_buffer );
400+ IREE_ASSERT_ARGUMENT (source_file );
375401 IREE_ASSERT_ARGUMENT (target_buffer );
376402 IREE_TRACE_ZONE_BEGIN (z0 );
377- IREE_TRACE_ZONE_APPEND_VALUE_I64 (z0 , (int64_t )length );
378- iree_status_t status = _VTABLE_DISPATCH (device , queue_copy )(
403+ iree_status_t status = _VTABLE_DISPATCH (device , queue_read )(
379404 device , queue_affinity , wait_semaphore_list , signal_semaphore_list ,
380- source_buffer , source_offset , target_buffer , target_offset , length ,
381- flags );
405+ source_file , source_offset , target_buffer , target_offset , length , flags );
382406 IREE_TRACE_ZONE_END (z0 );
383407 return status ;
384408}
385409
386- IREE_API_EXPORT iree_status_t iree_hal_device_queue_read (
410+ IREE_API_EXPORT iree_status_t iree_hal_device_queue_write (
387411 iree_hal_device_t * device , iree_hal_queue_affinity_t queue_affinity ,
388412 const iree_hal_semaphore_list_t wait_semaphore_list ,
389413 const iree_hal_semaphore_list_t signal_semaphore_list ,
390- iree_hal_file_t * source_file , uint64_t source_offset ,
391- iree_hal_buffer_t * target_buffer , iree_device_size_t target_offset ,
392- iree_device_size_t length , iree_hal_read_flags_t flags ) {
414+ iree_hal_buffer_t * source_buffer , iree_device_size_t source_offset ,
415+ iree_hal_file_t * target_file , uint64_t target_offset ,
416+ iree_device_size_t length , iree_hal_write_flags_t flags ) {
393417 IREE_ASSERT_ARGUMENT (device );
394418 IREE_ASSERT_ARGUMENT (
395419 !wait_semaphore_list .count ||
396420 (wait_semaphore_list .semaphores && wait_semaphore_list .payload_values ));
397421 IREE_ASSERT_ARGUMENT (!signal_semaphore_list .count ||
398422 (signal_semaphore_list .semaphores &&
399423 signal_semaphore_list .payload_values ));
400- IREE_ASSERT_ARGUMENT (source_file );
401- IREE_ASSERT_ARGUMENT (target_buffer );
424+ IREE_ASSERT_ARGUMENT (source_buffer );
425+ IREE_ASSERT_ARGUMENT (target_file );
402426 IREE_TRACE_ZONE_BEGIN (z0 );
403- iree_status_t status = _VTABLE_DISPATCH (device , queue_read )(
427+ iree_status_t status = _VTABLE_DISPATCH (device , queue_write )(
404428 device , queue_affinity , wait_semaphore_list , signal_semaphore_list ,
405- source_file , source_offset , target_buffer , target_offset , length , flags );
429+ source_buffer , source_offset , target_file , target_offset , length , flags );
406430 IREE_TRACE_ZONE_END (z0 );
407431 return status ;
408432}
409433
410- IREE_API_EXPORT iree_status_t iree_hal_device_queue_write (
434+ IREE_API_EXPORT iree_status_t iree_hal_device_queue_dispatch (
411435 iree_hal_device_t * device , iree_hal_queue_affinity_t queue_affinity ,
412436 const iree_hal_semaphore_list_t wait_semaphore_list ,
413437 const iree_hal_semaphore_list_t signal_semaphore_list ,
414- iree_hal_buffer_t * source_buffer , iree_device_size_t source_offset ,
415- iree_hal_file_t * target_file , uint64_t target_offset ,
416- iree_device_size_t length , iree_hal_write_flags_t flags ) {
438+ iree_hal_executable_t * executable , int32_t entry_point ,
439+ const iree_hal_dispatch_config_t config , iree_const_byte_span_t constants ,
440+ const iree_hal_buffer_ref_list_t bindings ,
441+ iree_hal_dispatch_flags_t flags ) {
417442 IREE_ASSERT_ARGUMENT (device );
418443 IREE_ASSERT_ARGUMENT (
419444 !wait_semaphore_list .count ||
420445 (wait_semaphore_list .semaphores && wait_semaphore_list .payload_values ));
421446 IREE_ASSERT_ARGUMENT (!signal_semaphore_list .count ||
422447 (signal_semaphore_list .semaphores &&
423448 signal_semaphore_list .payload_values ));
424- IREE_ASSERT_ARGUMENT (source_buffer );
425- IREE_ASSERT_ARGUMENT (target_file );
449+ IREE_ASSERT_ARGUMENT (executable );
426450 IREE_TRACE_ZONE_BEGIN (z0 );
427- iree_status_t status = _VTABLE_DISPATCH (device , queue_write )(
451+ iree_status_t status = _VTABLE_DISPATCH (device , queue_dispatch )(
428452 device , queue_affinity , wait_semaphore_list , signal_semaphore_list ,
429- source_buffer , source_offset , target_file , target_offset , length , flags );
453+ executable , entry_point , config , constants , bindings , flags );
454+ IREE_TRACE_ZONE_END (z0 );
455+ return status ;
456+ }
457+
458+ IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_dispatch (
459+ iree_hal_device_t * device , iree_hal_queue_affinity_t queue_affinity ,
460+ const iree_hal_semaphore_list_t wait_semaphore_list ,
461+ const iree_hal_semaphore_list_t signal_semaphore_list ,
462+ iree_hal_executable_t * executable , int32_t entry_point ,
463+ const iree_hal_dispatch_config_t config , iree_const_byte_span_t constants ,
464+ const iree_hal_buffer_ref_list_t bindings ,
465+ iree_hal_dispatch_flags_t flags ) {
466+ IREE_ASSERT_ARGUMENT (device );
467+ IREE_ASSERT_ARGUMENT (executable );
468+ IREE_TRACE_ZONE_BEGIN (z0 );
469+
470+ // If we are starting execution immediately then we can reduce latency by
471+ // allowing inline command buffer execution.
472+ iree_hal_command_buffer_mode_t command_buffer_mode =
473+ IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT ;
474+ if (wait_semaphore_list .count == 0 ) {
475+ command_buffer_mode |= IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION ;
476+ }
477+
478+ iree_hal_command_buffer_t * command_buffer = NULL ;
479+ IREE_RETURN_AND_END_ZONE_IF_ERROR (
480+ z0 , iree_hal_command_buffer_create (
481+ device , command_buffer_mode , IREE_HAL_COMMAND_CATEGORY_DISPATCH ,
482+ queue_affinity , /*binding_capacity=*/ 0 , & command_buffer ));
483+
484+ iree_status_t status = iree_hal_command_buffer_begin (command_buffer );
485+
486+ if (iree_status_is_ok (status )) {
487+ status = iree_hal_command_buffer_dispatch (command_buffer , executable ,
488+ entry_point , config , constants ,
489+ bindings , flags );
490+ }
491+
492+ if (iree_status_is_ok (status )) {
493+ status = iree_hal_command_buffer_end (command_buffer );
494+ }
495+
496+ if (iree_status_is_ok (status )) {
497+ status = iree_hal_device_queue_execute (
498+ device , queue_affinity , wait_semaphore_list , signal_semaphore_list ,
499+ command_buffer , iree_hal_buffer_binding_table_empty (),
500+ IREE_HAL_EXECUTE_FLAG_NONE );
501+ }
502+
503+ iree_hal_command_buffer_release (command_buffer );
504+
430505 IREE_TRACE_ZONE_END (z0 );
431506 return status ;
432507}
0 commit comments