Skip to content

Commit c73d46d

Browse files
committed
[ET-VK] Using push constants for conv2d dw.
This diff is related to the use of push constants for convolutional dw (depthwise) in Executorch's Vulkan backend. This optimization improves memory usage. Differential Revision: [D68493849](https://our.internmc.facebook.com/intern/diff/D68493849/) ghstack-source-id: 262823002 Pull Request resolved: #7928
1 parent 95d3072 commit c73d46d

File tree

3 files changed

+67
-59
lines changed

3 files changed

+67
-59
lines changed

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")}
3030
${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")}
3131
${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")}
3232
${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")}
33-
${layout_declare_ubo(4, "ivec3", "out_limits")}
34-
${layout_declare_ubo(5, "ivec4", "in_sizes")}
35-
${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")}
36-
${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")}
37-
${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}
33+
34+
layout(push_constant) uniform restrict Block {
35+
ivec4 out_limits;
36+
ivec4 in_sizes;
37+
ivec2 kernel_size;
38+
ivec2 stride;
39+
ivec2 padding;
40+
ivec2 dilation;
41+
ivec2 overlay_region;
42+
int in_group_size;
43+
int dummy_padding;
44+
float out_min;
45+
float out_max;
46+
};
3847

3948
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
4049

@@ -127,7 +136,7 @@ void main() {
127136
const ivec3 out_pos = pos_shared[offset_pos_index(gl_LocalInvocationIndex)];
128137
for (int y = 0; y < BATCH_SIZE_Y; y++) {
129138
for (int x = 0; x < BATCH_SIZE_X; x++) {
130-
if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits))) {
139+
if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits.xyz))) {
131140
continue;
132141
}
133142
imageStore(t_out, ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), op(sum[y][x], out_min, out_max));

backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_sned_output_tile.glsl

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")}
2424
${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")}
2525
${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")}
2626
${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")}
27-
${layout_declare_ubo(4, "ivec3", "out_limits")}
28-
${layout_declare_ubo(5, "ivec4", "in_sizes")}
29-
${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")}
30-
${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")}
31-
${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}
27+
28+
layout(push_constant) uniform restrict Block {
29+
ivec4 out_limits;
30+
ivec4 in_sizes;
31+
ivec2 kernel_size;
32+
ivec2 stride;
33+
ivec2 padding;
34+
ivec2 dilation;
35+
ivec2 overlay_region;
36+
int in_group_size;
37+
int dummy_padding;
38+
float out_min;
39+
float out_max;
40+
};
3241

3342
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
3443

backends/vulkan/runtime/graph/ops/impl/Convolution.cpp

Lines changed: 38 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,9 @@ void add_conv2d_node(
407407
wg_size = {wg_size[0] * wg_size[1] * wg_size[2], 1, 1};
408408
}
409409

410-
if (method == Conv2dMethod::Pointwise) {
410+
vkapi::ParamsBindList param_buffers;
411+
std::vector<PushConstantDataInfo> push_constants;
412+
if (method == Conv2dMethod::Pointwise || method == Conv2dMethod::Depthwise) {
411413
const utils::ivec4 kernel_param_size_stride = {
412414
kernel_params.kernel_size[0],
413415
kernel_params.kernel_size[1],
@@ -420,55 +422,43 @@ void add_conv2d_node(
420422
kernel_params.dilation[0],
421423
kernel_params.dilation[1]};
422424

423-
graph.execute_nodes().emplace_back(new DispatchNode(
424-
graph,
425-
shader,
426-
wg_size,
427-
graph.create_local_wg_size(wg_size),
428-
// Inputs and Outputs
429-
{{out, vkapi::MemoryAccessType::WRITE},
430-
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
431-
// Shader params buffers
432-
{},
433-
// Specialization Constants
434-
{},
435-
// Resizing Logic
436-
resize_conv2d_node,
437-
{weight_data, stride, padding, dilation, transposed, output_padding},
438-
{
439-
graph.logical_limits_pc_of(out),
440-
graph.sizes_pc_of(in),
441-
PushConstantDataInfo(
442-
&kernel_param_size_stride, sizeof(kernel_param_size_stride)),
443-
PushConstantDataInfo(
444-
&kernel_param_pad_dial, sizeof(kernel_param_pad_dial)),
445-
PushConstantDataInfo(
446-
&extra_params, sizeof(extra_params), sizeof(utils::ivec4)),
447-
PushConstantDataInfo(&out_params, sizeof(out_params)),
448-
}));
425+
push_constants = {
426+
graph.logical_limits_pc_of(out),
427+
graph.sizes_pc_of(in),
428+
PushConstantDataInfo(
429+
&kernel_param_size_stride, sizeof(kernel_param_size_stride)),
430+
PushConstantDataInfo(
431+
&kernel_param_pad_dial, sizeof(kernel_param_pad_dial)),
432+
PushConstantDataInfo(
433+
&extra_params, sizeof(extra_params), sizeof(utils::ivec4)),
434+
PushConstantDataInfo(&out_params, sizeof(out_params)),
435+
};
449436
} else {
450-
graph.execute_nodes().emplace_back(new DispatchNode(
451-
graph,
452-
shader,
453-
wg_size,
454-
graph.create_local_wg_size(wg_size),
455-
// Inputs and Outputs
456-
{{out, vkapi::MemoryAccessType::WRITE},
457-
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
458-
// Shader params buffers
459-
{
460-
t_out->logical_limits_ubo(),
461-
t_in->sizes_ubo(),
462-
graph.create_params_buffer(kernel_params),
463-
graph.create_params_buffer(extra_params),
464-
graph.create_params_buffer(out_params),
465-
},
466-
// Specialization Constants
467-
{},
468-
// Resizing Logic
469-
resize_conv2d_node,
470-
{weight_data, stride, padding, dilation, transposed, output_padding}));
437+
param_buffers = {
438+
t_out->logical_limits_ubo(),
439+
t_in->sizes_ubo(),
440+
graph.create_params_buffer(kernel_params),
441+
graph.create_params_buffer(extra_params),
442+
graph.create_params_buffer(out_params),
443+
};
471444
}
445+
446+
graph.execute_nodes().emplace_back(new DispatchNode(
447+
graph,
448+
shader,
449+
wg_size,
450+
graph.create_local_wg_size(wg_size),
451+
// Inputs and Outputs
452+
{{out, vkapi::MemoryAccessType::WRITE},
453+
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
454+
// Shader params buffers
455+
std::move(param_buffers),
456+
// Specialization Constants
457+
{},
458+
// Resizing Logic
459+
resize_conv2d_node,
460+
{weight_data, stride, padding, dilation, transposed, output_padding},
461+
std::move(push_constants)));
472462
}
473463

474464
void add_conv1d_node(

0 commit comments

Comments
 (0)