diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl index 5a42f50e91e..be8ff15d4a8 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl @@ -32,11 +32,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")} ${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")} ${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")} ${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")} -${layout_declare_ubo(4, "ivec3", "out_limits")} -${layout_declare_ubo(5, "ivec4", "in_sizes")} -${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")} -${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")} -${layout_declare_ubo(8, "float", "out_min", "float", "out_max")} + +layout(push_constant) uniform restrict Block { + ivec4 out_limits; + ivec4 in_sizes; + ivec2 kernel_size; + ivec2 stride; + ivec2 padding; + ivec2 dilation; + ivec2 overlay_region; + int in_group_size; + int dummy_padding; + float out_min; + float out_max; +}; layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; @@ -127,7 +136,7 @@ void main() { const ivec3 out_pos = pos_shared[offset_pos_index(gl_LocalInvocationIndex)]; for (int y = 0; y < BATCH_SIZE_Y; y++) { for (int x = 0; x < BATCH_SIZE_X; x++) { - if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits))) { + if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits.xyz))) { continue; } imageStore(t_out, ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), op(sum[y][x], out_min, out_max)); diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_sned_output_tile.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_sned_output_tile.glsl index bb70ee1aabb..ceadc35779e 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_sned_output_tile.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_sned_output_tile.glsl @@ -24,11 +24,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")} ${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")} ${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")} ${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")} -${layout_declare_ubo(4, "ivec3", "out_limits")} -${layout_declare_ubo(5, "ivec4", "in_sizes")} -${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")} -${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")} -${layout_declare_ubo(8, "float", "out_min", "float", "out_max")} + +layout(push_constant) uniform restrict Block { + ivec4 out_limits; + ivec4 in_sizes; + ivec2 kernel_size; + ivec2 stride; + ivec2 padding; + ivec2 dilation; + ivec2 overlay_region; + int in_group_size; + int dummy_padding; + float out_min; + float out_max; +}; layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index 3c367f334d9..71b7ce80cc0 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -407,7 +407,9 @@ void add_conv2d_node( wg_size = {wg_size[0] * wg_size[1] * wg_size[2], 1, 1}; } - if (method == Conv2dMethod::Pointwise) { + vkapi::ParamsBindList param_buffers; + std::vector push_constants; + if (method == Conv2dMethod::Pointwise || method == Conv2dMethod::Depthwise) { const utils::ivec4 kernel_param_size_stride = { kernel_params.kernel_size[0], kernel_params.kernel_size[1], @@ -420,55 +422,43 @@ void add_conv2d_node( kernel_params.dilation[0], kernel_params.dilation[1]}; - graph.execute_nodes().emplace_back(new DispatchNode( - graph, - shader, - wg_size, - graph.create_local_wg_size(wg_size), - // Inputs and Outputs - {{out, vkapi::MemoryAccessType::WRITE}, - {{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}}, - // Shader params buffers - {}, - // Specialization Constants - {}, - // Resizing Logic - resize_conv2d_node, - {weight_data, stride, padding, dilation, transposed, output_padding}, - { - graph.logical_limits_pc_of(out), - graph.sizes_pc_of(in), - PushConstantDataInfo( - &kernel_param_size_stride, sizeof(kernel_param_size_stride)), - PushConstantDataInfo( - &kernel_param_pad_dial, sizeof(kernel_param_pad_dial)), - PushConstantDataInfo( - &extra_params, sizeof(extra_params), sizeof(utils::ivec4)), - PushConstantDataInfo(&out_params, sizeof(out_params)), - })); + push_constants = { + graph.logical_limits_pc_of(out), + graph.sizes_pc_of(in), + PushConstantDataInfo( + &kernel_param_size_stride, sizeof(kernel_param_size_stride)), + PushConstantDataInfo( + &kernel_param_pad_dial, sizeof(kernel_param_pad_dial)), + PushConstantDataInfo( + &extra_params, sizeof(extra_params), sizeof(utils::ivec4)), + PushConstantDataInfo(&out_params, sizeof(out_params)), + }; } else { - graph.execute_nodes().emplace_back(new DispatchNode( - graph, - shader, - wg_size, - graph.create_local_wg_size(wg_size), - // Inputs and Outputs - {{out, vkapi::MemoryAccessType::WRITE}, - {{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}}, - // Shader params buffers - { - t_out->logical_limits_ubo(), - t_in->sizes_ubo(), - graph.create_params_buffer(kernel_params), - graph.create_params_buffer(extra_params), - graph.create_params_buffer(out_params), - }, - // Specialization Constants - {}, - // Resizing Logic - resize_conv2d_node, - {weight_data, stride, padding, dilation, transposed, output_padding})); + param_buffers = { + t_out->logical_limits_ubo(), + t_in->sizes_ubo(), + graph.create_params_buffer(kernel_params), + graph.create_params_buffer(extra_params), + graph.create_params_buffer(out_params), + }; } + + graph.execute_nodes().emplace_back(new DispatchNode( + graph, + shader, + wg_size, + graph.create_local_wg_size(wg_size), + // Inputs and Outputs + {{out, vkapi::MemoryAccessType::WRITE}, + {{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}}, + // Shader params buffers + param_buffers, + // Specialization Constants + {}, + // Resizing Logic + resize_conv2d_node, + {weight_data, stride, padding, dilation, transposed, output_padding}, + push_constants)); } void add_conv1d_node(