Skip to content
21 changes: 15 additions & 6 deletions backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")}
${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")}
${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")}
${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")}
${layout_declare_ubo(4, "ivec3", "out_limits")}
${layout_declare_ubo(5, "ivec4", "in_sizes")}
${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")}
${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")}
${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}

layout(push_constant) uniform restrict Block {
ivec4 out_limits;
ivec4 in_sizes;
ivec2 kernel_size;
ivec2 stride;
ivec2 padding;
ivec2 dilation;
ivec2 overlay_region;
int in_group_size;
int dummy_padding;
float out_min;
float out_max;
};

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

Expand Down Expand Up @@ -127,7 +136,7 @@ void main() {
const ivec3 out_pos = pos_shared[offset_pos_index(gl_LocalInvocationIndex)];
for (int y = 0; y < BATCH_SIZE_Y; y++) {
for (int x = 0; x < BATCH_SIZE_X; x++) {
if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits))) {
if (any(greaterThanEqual(ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), out_limits.xyz))) {
continue;
}
imageStore(t_out, ivec3(out_pos.x + x, out_pos.y + y, out_pos.z), op(sum[y][x], out_min, out_max));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,20 @@ ${layout_declare_tensor(0, "w", "t_out", DTYPE, "texture3d")}
${layout_declare_tensor(1, "r", "t_in", DTYPE, "texture3d")}
${layout_declare_tensor(2, "r", "t_kernel", DTYPE, "texture2d")}
${layout_declare_tensor(3, "r", "t_bias", DTYPE, "texture2d")}
${layout_declare_ubo(4, "ivec3", "out_limits")}
${layout_declare_ubo(5, "ivec4", "in_sizes")}
${layout_declare_ubo(6, "ivec2", "kernel_size", "ivec2", "stride", "ivec2", "padding", "ivec2", "dilation")}
${layout_declare_ubo(7, "ivec2", "overlay_region", "int", "in_group_size")}
${layout_declare_ubo(8, "float", "out_min", "float", "out_max")}

layout(push_constant) uniform restrict Block {
ivec4 out_limits;
ivec4 in_sizes;
ivec2 kernel_size;
ivec2 stride;
ivec2 padding;
ivec2 dilation;
ivec2 overlay_region;
int in_group_size;
int dummy_padding;
float out_min;
float out_max;
};

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

Expand Down
86 changes: 38 additions & 48 deletions backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,9 @@ void add_conv2d_node(
wg_size = {wg_size[0] * wg_size[1] * wg_size[2], 1, 1};
}

if (method == Conv2dMethod::Pointwise) {
vkapi::ParamsBindList param_buffers;
std::vector<PushConstantDataInfo> push_constants;
if (method == Conv2dMethod::Pointwise || method == Conv2dMethod::Depthwise) {
const utils::ivec4 kernel_param_size_stride = {
kernel_params.kernel_size[0],
kernel_params.kernel_size[1],
Expand All @@ -420,55 +422,43 @@ void add_conv2d_node(
kernel_params.dilation[0],
kernel_params.dilation[1]};

graph.execute_nodes().emplace_back(new DispatchNode(
graph,
shader,
wg_size,
graph.create_local_wg_size(wg_size),
// Inputs and Outputs
{{out, vkapi::MemoryAccessType::WRITE},
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{},
// Specialization Constants
{},
// Resizing Logic
resize_conv2d_node,
{weight_data, stride, padding, dilation, transposed, output_padding},
{
graph.logical_limits_pc_of(out),
graph.sizes_pc_of(in),
PushConstantDataInfo(
&kernel_param_size_stride, sizeof(kernel_param_size_stride)),
PushConstantDataInfo(
&kernel_param_pad_dial, sizeof(kernel_param_pad_dial)),
PushConstantDataInfo(
&extra_params, sizeof(extra_params), sizeof(utils::ivec4)),
PushConstantDataInfo(&out_params, sizeof(out_params)),
}));
push_constants = {
graph.logical_limits_pc_of(out),
graph.sizes_pc_of(in),
PushConstantDataInfo(
&kernel_param_size_stride, sizeof(kernel_param_size_stride)),
PushConstantDataInfo(
&kernel_param_pad_dial, sizeof(kernel_param_pad_dial)),
PushConstantDataInfo(
&extra_params, sizeof(extra_params), sizeof(utils::ivec4)),
PushConstantDataInfo(&out_params, sizeof(out_params)),
};
} else {
graph.execute_nodes().emplace_back(new DispatchNode(
graph,
shader,
wg_size,
graph.create_local_wg_size(wg_size),
// Inputs and Outputs
{{out, vkapi::MemoryAccessType::WRITE},
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
// Shader params buffers
{
t_out->logical_limits_ubo(),
t_in->sizes_ubo(),
graph.create_params_buffer(kernel_params),
graph.create_params_buffer(extra_params),
graph.create_params_buffer(out_params),
},
// Specialization Constants
{},
// Resizing Logic
resize_conv2d_node,
{weight_data, stride, padding, dilation, transposed, output_padding}));
param_buffers = {
t_out->logical_limits_ubo(),
t_in->sizes_ubo(),
graph.create_params_buffer(kernel_params),
graph.create_params_buffer(extra_params),
graph.create_params_buffer(out_params),
};
}

graph.execute_nodes().emplace_back(new DispatchNode(
graph,
shader,
wg_size,
graph.create_local_wg_size(wg_size),
// Inputs and Outputs
{{out, vkapi::MemoryAccessType::WRITE},
{{in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
// Shader params buffers
param_buffers,
// Specialization Constants
{},
// Resizing Logic
resize_conv2d_node,
{weight_data, stride, padding, dilation, transposed, output_padding},
push_constants));
}

void add_conv1d_node(
Expand Down