Skip to content

Commit 3ea76df

Browse files
benvanikkeshavvinayak01
authored andcommitted
Unifying dispatch/dispatch_indirect and adding extended configuration. (iree-org#21627)
`iree_hal_dispatch_config_t` now defines the dispatch configuration (workgroup size, workgroup count, dynamic workgroup local memory, and anything we want to add in the future). This avoids the extra vtable entry plumbed through the API for indirect dispatch and will let us add new dispatch behavior in the future without new APIs. Implementations can still switch internally based on the flags if they need to. A new behavior for custom/indirect arguments is defined but not yet implemented anywhere. This bypasses the IREE HAL ABI and allows users to directly pass in dispatch arguments either directly inline in the command buffer (via the constants field) or indirectly via `bindings[0]`. This can be used to dispatch existing kernels that have their own ABI (HIP/CUDA/etc). The indirect variant allows for custom arguments to be provided per submission as the `bindings[0]` buffer can reference a slot in the binding table. Fixes iree-org#21620. Fixes iree-org#21621. Signed-off-by: keshavvinayak01 <[email protected]>
1 parent ed96d29 commit 3ea76df

30 files changed

+656
-802
lines changed

compiler/src/iree/compiler/Dialect/HAL/IR/HALAttrs.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def HAL_BufferUsage_None : I32BitEnumAttrCase<"None",
7777
def HAL_BufferUsage_TransferSource : I32BitEnumAttrCase<"TransferSource", 0x00000001>;
7878
def HAL_BufferUsage_TransferTarget : I32BitEnumAttrCase<"TransferTarget", 0x00000002>;
7979
def HAL_BufferUsage_Transfer : I32BitEnumAttrCase<"Transfer", 0x00000003>;
80-
def HAL_BufferUsage_DispatchIndirectParams : I32BitEnumAttrCase<"DispatchIndirectParams", 0x00000100>;
80+
def HAL_BufferUsage_DispatchIndirectParameters : I32BitEnumAttrCase<"DispatchIndirectParameters", 0x00000100>;
8181
def HAL_BufferUsage_DispatchUniformRead : I32BitEnumAttrCase<"DispatchUniformRead", 0x00000200>;
8282
def HAL_BufferUsage_DispatchStorageRead : I32BitEnumAttrCase<"DispatchStorageRead", 0x00000400>;
8383
def HAL_BufferUsage_DispatchStorageWrite : I32BitEnumAttrCase<"DispatchStorageWrite", 0x00000800>;
@@ -101,7 +101,7 @@ def HAL_BufferUsageBitfieldAttr :
101101
HAL_BufferUsage_TransferSource,
102102
HAL_BufferUsage_TransferTarget,
103103
HAL_BufferUsage_Transfer,
104-
HAL_BufferUsage_DispatchIndirectParams,
104+
HAL_BufferUsage_DispatchIndirectParameters,
105105
HAL_BufferUsage_DispatchUniformRead,
106106
HAL_BufferUsage_DispatchStorageRead,
107107
HAL_BufferUsage_DispatchStorageWrite,

experimental/webgpu/command_buffer.c

Lines changed: 25 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -806,123 +806,6 @@ static iree_status_t iree_hal_webgpu_command_buffer_push_descriptor_set(
806806
return iree_ok_status();
807807
}
808808

809-
static iree_status_t iree_hal_webgpu_command_buffer_prepare_dispatch(
810-
iree_hal_webgpu_command_buffer_t* command_buffer,
811-
iree_hal_executable_t* executable, uint32_t ordinal,
812-
iree_const_byte_span_t constants, iree_hal_buffer_ref_list_t bindings,
813-
iree_hal_dispatch_flags_t flags, WGPUComputePassEncoder* out_compute_pass) {
814-
const iree_hal_webgpu_entry_point_t* entry_point =
815-
iree_hal_webgpu_executable_lookup_entry_point(executable, ordinal);
816-
817-
// Upload push constant data - this may incur a segment flush if the staging
818-
// buffer is exhausted.
819-
iree_host_size_t constant_count =
820-
iree_hal_webgpu_pipeline_layout_constant_count(entry_point->layout);
821-
iree_const_byte_span_t constant_data = iree_make_const_byte_span(
822-
command_buffer->state.constants,
823-
constant_count * sizeof(command_buffer->state.constants[0]));
824-
uint32_t params_offset = 0;
825-
IREE_RETURN_IF_ERROR(iree_hal_webgpu_command_buffer_append_parameters(
826-
command_buffer, constant_data, &params_offset));
827-
828-
// Acquire the compute pass we'll encode the dispatch into - this may be
829-
// fresh or reused from prior commands.
830-
WGPUComputePassEncoder compute_pass = NULL;
831-
IREE_RETURN_IF_ERROR(iree_hal_webgpu_command_buffer_acquire_compute_pass(
832-
command_buffer, &compute_pass));
833-
wgpuComputePassEncoderSetPipeline(compute_pass, entry_point->pipeline);
834-
835-
if (constant_count > 0) {
836-
// Bind the push constant emulation bind group at the staging buffer
837-
// relative offset for this dispatch.
838-
wgpuComputePassEncoderSetBindGroup(
839-
compute_pass, IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX,
840-
command_buffer->staging_buffer->bind_group, 1, &params_offset);
841-
}
842-
843-
// Set all bindings.
844-
const iree_hal_webgpu_set_binding_info_t* binding_info =
845-
iree_hal_webgpu_pipeline_layout_set_binding_info(entry_point->layout);
846-
for (iree_host_size_t i = 0; i < binding_info->set_count; ++i) {
847-
// If there are no bindings in this set we can skip it.
848-
if (binding_info->set_masks[i] == 0) continue;
849-
850-
// If there is a bind group handle then it means we've done the lookup and
851-
// set the bind group on the device already - we can skip.
852-
if (command_buffer->state.bind_groups[i].handle) continue;
853-
854-
// Acquire the bind group to use for the current descriptor set.
855-
WGPUBindGroup handle = iree_hal_webgpu_bind_group_cache_acquire(
856-
command_buffer->bind_group_cache, binding_info->set_layouts[i],
857-
command_buffer->state.bind_groups[i].bindings,
858-
binding_info->set_masks[i]);
859-
860-
// NOTE: today we don't support dynamic offsets for push descriptor sets.
861-
// This will be a larger change we'll need to handle in the compiler. If we
862-
// wanted to improve caching we could make all the bindings dynamic and then
863-
// always cache the base offsets, however
864-
// maxDynamicStorageBuffersPerPipelineLayout is minimally 4 and that's not
865-
// a lot of bindings.
866-
wgpuComputePassEncoderSetBindGroup(compute_pass, (uint32_t)i, handle, 0,
867-
NULL);
868-
command_buffer->state.bind_groups[i].handle = handle;
869-
command_buffer->state.bind_groups_empty &= ~(1ull << i);
870-
}
871-
872-
if (constant_count > 0) {
873-
// Pad up to IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX with empty bind groups.
874-
WGPUBindGroup empty_handle =
875-
command_buffer->staging_buffer->empty_bind_group;
876-
for (iree_host_size_t i = binding_info->set_count;
877-
i < IREE_HAL_WEBGPU_PARAMS_BIND_GROUP_INDEX; ++i) {
878-
// Skip if an empty group is already set at this index.
879-
if ((command_buffer->state.bind_groups_empty >> i) & 1ull) continue;
880-
881-
wgpuComputePassEncoderSetBindGroup(compute_pass, (uint32_t)i,
882-
empty_handle, 0, NULL);
883-
command_buffer->state.bind_groups[i].handle = empty_handle;
884-
command_buffer->state.bind_groups_empty |= 1ull << i;
885-
}
886-
}
887-
888-
*out_compute_pass = compute_pass;
889-
return iree_ok_status();
890-
}
891-
892-
static iree_status_t iree_hal_webgpu_command_buffer_dispatch(
893-
iree_hal_command_buffer_t* base_command_buffer,
894-
iree_hal_executable_t* executable, int32_t entry_point,
895-
uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z,
896-
iree_hal_dispatch_flags_t flags) {
897-
iree_hal_webgpu_command_buffer_t* command_buffer =
898-
iree_hal_webgpu_command_buffer_cast(base_command_buffer);
899-
900-
WGPUComputePassEncoder compute_pass = NULL;
901-
IREE_RETURN_IF_ERROR(iree_hal_webgpu_command_buffer_prepare_dispatch(
902-
command_buffer, executable, entry_point, &compute_pass));
903-
wgpuComputePassEncoderDispatchWorkgroups(compute_pass, workgroup_x,
904-
workgroup_y, workgroup_z);
905-
906-
return iree_ok_status();
907-
}
908-
909-
static iree_status_t iree_hal_webgpu_command_buffer_dispatch_indirect(
910-
iree_hal_command_buffer_t* base_command_buffer,
911-
iree_hal_executable_t* executable, int32_t entry_point,
912-
iree_hal_buffer_ref_t workgroups_ref, iree_hal_dispatch_flags_t flags) {
913-
iree_hal_webgpu_command_buffer_t* command_buffer =
914-
iree_hal_webgpu_command_buffer_cast(base_command_buffer);
915-
916-
WGPUComputePassEncoder compute_pass = NULL;
917-
IREE_RETURN_IF_ERROR(iree_hal_webgpu_command_buffer_prepare_dispatch(
918-
command_buffer, executable, entry_point, &compute_pass));
919-
wgpuComputePassEncoderDispatchWorkgroupsIndirect(
920-
compute_pass, iree_hal_webgpu_buffer_handle(workgroups_ref.buffer),
921-
workgroups_ref.offset);
922-
923-
return iree_ok_status();
924-
}
925-
926809
static iree_status_t iree_hal_webgpu_command_buffer_prepare_dispatch(
927810
iree_hal_webgpu_command_buffer_t* command_buffer,
928811
iree_hal_executable_t* executable, uint32_t ordinal,
@@ -968,15 +851,16 @@ static iree_status_t iree_hal_webgpu_command_buffer_prepare_dispatch(
968851
binding_mask |= 1u << i;
969852
group_bindings[i].type = WGPUBufferBindingType_Storage;
970853
group_bindings[i].buffer =
971-
bindings[i].buffer ? iree_hal_webgpu_buffer_handle(bindings[i].buffer)
972-
: NULL;
973-
group_bindings[i] offset = bindings[i].offset;
974-
group_bindings[i] length = bindings[i].length;
854+
bindings.values[i].buffer
855+
? iree_hal_webgpu_buffer_handle(bindings.values[i].buffer)
856+
: NULL;
857+
group_bindings[i].offset = bindings.values[i].offset;
858+
group_bindings[i].length = bindings.values[i].length;
975859
}
976860

977861
// Acquire the bind group to use for the current descriptor set.
978862
WGPUBindGroup handle = iree_hal_webgpu_bind_group_cache_acquire(
979-
command_buffer->bind_group_cache, binding_info->set_layout,
863+
command_buffer->bind_group_cache, binding_info->set_layouts[0],
980864
group_bindings, binding_mask);
981865

982866
// NOTE: today we don't support dynamic offsets for push descriptor sets.
@@ -994,36 +878,33 @@ static iree_status_t iree_hal_webgpu_command_buffer_prepare_dispatch(
994878
static iree_status_t iree_hal_webgpu_command_buffer_dispatch(
995879
iree_hal_command_buffer_t* base_command_buffer,
996880
iree_hal_executable_t* executable, int32_t entry_point,
997-
const uint32_t workgroup_count[3], iree_const_byte_span_t constants,
998-
iree_hal_buffer_ref_list_t bindings, iree_hal_dispatch_flags_t flags) {
881+
const iree_hal_dispatch_config_t config, iree_const_byte_span_t constants,
882+
const iree_hal_buffer_ref_list_t bindings,
883+
iree_hal_dispatch_flags_t flags) {
999884
iree_hal_webgpu_command_buffer_t* command_buffer =
1000885
iree_hal_webgpu_command_buffer_cast(base_command_buffer);
1001886

1002-
WGPUComputePassEncoder compute_pass = NULL;
1003-
IREE_RETURN_IF_ERROR(iree_hal_webgpu_command_buffer_prepare_dispatch(
1004-
command_buffer, executable, entry_point, constants, bindings, flags,
1005-
&compute_pass));
1006-
wgpuComputePassEncoderDispatchWorkgroups(
1007-
compute_pass, workgroup_count[0], workgroup_count[1], workgroup_count[2]);
1008-
1009-
return iree_ok_status();
1010-
}
1011-
1012-
static iree_status_t iree_hal_webgpu_command_buffer_dispatch_indirect(
1013-
iree_hal_command_buffer_t* base_command_buffer,
1014-
iree_hal_executable_t* executable, int32_t entry_point,
1015-
iree_hal_buffer_ref_t workgroups_ref, iree_const_byte_span_t constants,
1016-
iree_hal_buffer_ref_list_t bindings, iree_hal_dispatch_flags_t flags) {
1017-
iree_hal_webgpu_command_buffer_t* command_buffer =
1018-
iree_hal_webgpu_command_buffer_cast(base_command_buffer);
887+
if (iree_hal_dispatch_uses_custom_arguments(flags)) {
888+
return iree_make_status(
889+
IREE_STATUS_UNIMPLEMENTED,
890+
"direct/indirect arguments are not supported in WebGPU");
891+
}
1019892

1020893
WGPUComputePassEncoder compute_pass = NULL;
1021894
IREE_RETURN_IF_ERROR(iree_hal_webgpu_command_buffer_prepare_dispatch(
1022895
command_buffer, executable, entry_point, constants, bindings, flags,
1023896
&compute_pass));
1024-
wgpuComputePassEncoderDispatchWorkgroupsIndirect(
1025-
compute_pass, iree_hal_webgpu_buffer_handle(workgroups_ref.buffer),
1026-
workgroups_ref.offset);
897+
898+
if (iree_hal_dispatch_uses_indirect_parameters(flags)) {
899+
wgpuComputePassEncoderDispatchWorkgroupsIndirect(
900+
compute_pass,
901+
iree_hal_webgpu_buffer_handle(config.workgroup_count_ref.buffer),
902+
config.workgroup_count_ref.offset);
903+
} else {
904+
wgpuComputePassEncoderDispatchWorkgroups(
905+
compute_pass, config.workgroup_count[0], config.workgroup_count[1],
906+
config.workgroup_count[2]);
907+
}
1027908

1028909
return iree_ok_status();
1029910
}
@@ -1045,7 +926,4 @@ const iree_hal_command_buffer_vtable_t iree_hal_webgpu_command_buffer_vtable = {
1045926
.constants = iree_hal_webgpu_command_buffer_constants,
1046927
.push_descriptor_set = iree_hal_webgpu_command_buffer_push_descriptor_set,
1047928
.dispatch = iree_hal_webgpu_command_buffer_dispatch,
1048-
.dispatch_indirect = iree_hal_webgpu_command_buffer_dispatch_indirect,
1049-
.dispatch = iree_hal_webgpu_command_buffer_dispatch,
1050-
.dispatch_indirect = iree_hal_webgpu_command_buffer_dispatch_indirect,
1051929
};

experimental/webgpu/simple_allocator.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ static iree_status_t iree_hal_webgpu_simple_allocator_allocate_buffer(
176176
usage_flags |= WGPUBufferUsage_Uniform;
177177
}
178178
if (iree_any_bit_set(params->usage,
179-
IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMS)) {
179+
IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMETERS)) {
180180
usage_flags |= WGPUBufferUsage_Indirect;
181181
}
182182

integrations/pjrt/test/triage_jaxtest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def check_buffer_usage(errortxt, _, __):
328328
return (
329329
"requested buffer usage is not supported" in errortxt
330330
or "tensor requested usage was not specified when the buffer" in errortxt
331-
or "PERMISSION_DENIED; requested usage was not specified when the buffer was allocated; buffer allows DISPATCH_INDIRECT_PARAMS"
331+
or "PERMISSION_DENIED; requested usage was not specified when the buffer was allocated; buffer allows DISPATCH_INDIRECT_PARAMETERS"
332332
in errortxt
333333
)
334334

runtime/bindings/python/hal.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,8 +1286,8 @@ void SetupHalBindings(nanobind::module_ m) {
12861286
.value("TRANSFER_SOURCE", IREE_HAL_BUFFER_USAGE_TRANSFER_SOURCE)
12871287
.value("TRANSFER_TARGET", IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET)
12881288
.value("TRANSFER", IREE_HAL_BUFFER_USAGE_TRANSFER)
1289-
.value("DISPATCH_INDIRECT_PARAMS",
1290-
IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMS)
1289+
.value("DISPATCH_INDIRECT_PARAMETERS",
1290+
IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMETERS)
12911291
.value("DISPATCH_UNIFORM_READ",
12921292
IREE_HAL_BUFFER_USAGE_DISPATCH_UNIFORM_READ)
12931293
.value("DISPATCH_STORAGE_READ",

runtime/bindings/python/iree/runtime/_binding.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class BufferUsage(int):
4242
DISPATCH_IMAGE: ClassVar[BufferUsage] = ...
4343
DISPATCH_IMAGE_READ: ClassVar[BufferUsage] = ...
4444
DISPATCH_IMAGE_WRITE: ClassVar[BufferUsage] = ...
45-
DISPATCH_INDIRECT_PARAMS: ClassVar[BufferUsage] = ...
45+
DISPATCH_INDIRECT_PARAMETERS: ClassVar[BufferUsage] = ...
4646
DISPATCH_STORAGE: ClassVar[BufferUsage] = ...
4747
DISPATCH_STORAGE_READ: ClassVar[BufferUsage] = ...
4848
DISPATCH_STORAGE_WRITE: ClassVar[BufferUsage] = ...

runtime/src/iree/hal/buffer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ static const iree_bitfield_string_mapping_t iree_hal_buffer_usage_mappings[] = {
8686
// Separate:
8787
{IREE_HAL_BUFFER_USAGE_TRANSFER_SOURCE, IREE_SVL("TRANSFER_SOURCE")},
8888
{IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET, IREE_SVL("TRANSFER_TARGET")},
89-
{IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMS, IREE_SVL("DISPATCH_INDIRECT_PARAMS")},
89+
{IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMETERS, IREE_SVL("DISPATCH_INDIRECT_PARAMETERS")},
9090
{IREE_HAL_BUFFER_USAGE_DISPATCH_UNIFORM_READ, IREE_SVL("DISPATCH_UNIFORM_READ")},
9191
{IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE_READ, IREE_SVL("DISPATCH_STORAGE_READ")},
9292
{IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE_WRITE, IREE_SVL("DISPATCH_STORAGE_WRITE")},

runtime/src/iree/hal/buffer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ enum iree_hal_buffer_usage_bits_t {
230230
// - GPUBufferUsage.INDIRECT
231231
// - MTLResourceUsageRead
232232
// - VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT
233-
IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMS = 1u << 8,
233+
IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMETERS = 1u << 8,
234234

235235
// Buffer contents are uniformly read by dispatches.
236236
// These may occasionally be written as storage buffers in cases of
@@ -309,7 +309,7 @@ enum iree_hal_buffer_usage_bits_t {
309309

310310
// Buffer contents are available for use by all dispatch-related operations.
311311
IREE_HAL_BUFFER_USAGE_DISPATCH =
312-
IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMS |
312+
IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMETERS |
313313
IREE_HAL_BUFFER_USAGE_DISPATCH_UNIFORM_READ |
314314
IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE |
315315
IREE_HAL_BUFFER_USAGE_DISPATCH_IMAGE,

runtime/src/iree/hal/command_buffer.c

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -519,12 +519,17 @@ IREE_API_EXPORT iree_status_t iree_hal_command_buffer_collective(
519519
IREE_API_EXPORT iree_status_t iree_hal_command_buffer_dispatch(
520520
iree_hal_command_buffer_t* command_buffer,
521521
iree_hal_executable_t* executable, int32_t entry_point,
522-
const uint32_t workgroup_count[3], iree_const_byte_span_t constants,
523-
iree_hal_buffer_ref_list_t bindings, iree_hal_dispatch_flags_t flags) {
522+
const iree_hal_dispatch_config_t config, iree_const_byte_span_t constants,
523+
const iree_hal_buffer_ref_list_t bindings,
524+
iree_hal_dispatch_flags_t flags) {
524525
IREE_ASSERT_ARGUMENT(command_buffer);
525526
IREE_ASSERT_ARGUMENT(executable);
526527

527-
if ((workgroup_count[0] | workgroup_count[1] | workgroup_count[2]) == 0) {
528+
const bool has_static_workgroup_count =
529+
!iree_hal_dispatch_uses_indirect_parameters(flags);
530+
if (has_static_workgroup_count &&
531+
(config.workgroup_count[0] | config.workgroup_count[1] |
532+
config.workgroup_count[2]) == 0) {
528533
// No-op dispatch. All implementations are expected to do this but we ensure
529534
// it happens here to avoid the overhead of going all the way down into the
530535
// device layer for something we know should have no (intentional)
@@ -543,50 +548,34 @@ IREE_API_EXPORT iree_status_t iree_hal_command_buffer_dispatch(
543548
// slice off a much larger allocation and then suballocate from that ourselves
544549
// so that we could avoid the tracy_malloc overheads per-dispatch.
545550
IREE_TRACE({
546-
char xyz_string[32];
547-
int xyz_string_length =
548-
snprintf(xyz_string, IREE_ARRAYSIZE(xyz_string), "%ux%ux%u",
549-
workgroup_count[0], workgroup_count[1], workgroup_count[2]);
550-
IREE_TRACE_ZONE_APPEND_TEXT(z0, xyz_string, xyz_string_length);
551+
if (has_static_workgroup_count) {
552+
char xyz_string[32];
553+
int xyz_string_length =
554+
snprintf(xyz_string, IREE_ARRAYSIZE(xyz_string), "%ux%ux%u",
555+
config.workgroup_count[0], config.workgroup_count[1],
556+
config.workgroup_count[2]);
557+
IREE_TRACE_ZONE_APPEND_TEXT(z0, xyz_string, xyz_string_length);
558+
} else {
559+
IREE_TRACE_ZONE_APPEND_TEXT(z0, "(indirect)");
560+
}
551561
});
552562
#endif // IREE_HAL_VERBOSE_TRACING_ENABLE
553563

554564
IF_VALIDATING(command_buffer, {
555565
IREE_RETURN_AND_END_ZONE_IF_ERROR(
556566
z0, iree_hal_command_buffer_dispatch_validation(
557567
command_buffer, VALIDATION_STATE(command_buffer), executable,
558-
entry_point, workgroup_count, constants, bindings, flags));
568+
entry_point, config, constants, bindings, flags));
559569
});
560570

561571
iree_status_t status = _VTABLE_DISPATCH(command_buffer, dispatch)(
562-
command_buffer, executable, entry_point, workgroup_count, constants,
563-
bindings, flags);
572+
command_buffer, executable, entry_point, config, constants, bindings,
573+
flags);
564574

565575
IREE_TRACE_ZONE_END(z0);
566576
return status;
567577
}
568578

569-
IREE_API_EXPORT iree_status_t iree_hal_command_buffer_dispatch_indirect(
570-
iree_hal_command_buffer_t* command_buffer,
571-
iree_hal_executable_t* executable, int32_t entry_point,
572-
iree_hal_buffer_ref_t workgroups_ref, iree_const_byte_span_t constants,
573-
iree_hal_buffer_ref_list_t bindings, iree_hal_dispatch_flags_t flags) {
574-
IREE_ASSERT_ARGUMENT(command_buffer);
575-
IREE_ASSERT_ARGUMENT(executable);
576-
IREE_TRACE_ZONE_BEGIN(z0);
577-
IF_VALIDATING(command_buffer, {
578-
IREE_RETURN_AND_END_ZONE_IF_ERROR(
579-
z0, iree_hal_command_buffer_dispatch_indirect_validation(
580-
command_buffer, VALIDATION_STATE(command_buffer), executable,
581-
entry_point, workgroups_ref, constants, bindings, flags));
582-
});
583-
iree_status_t status = _VTABLE_DISPATCH(command_buffer, dispatch_indirect)(
584-
command_buffer, executable, entry_point, workgroups_ref, constants,
585-
bindings, flags);
586-
IREE_TRACE_ZONE_END(z0);
587-
return status;
588-
}
589-
590579
//===----------------------------------------------------------------------===//
591580
// Validation support
592581
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)