Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ jobs:
# Custom operator tests
PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
./cmake-out/backends/vulkan/test/custom_ops/q8csw_linear
./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d

nxp-build-test:
name: nxp-build-test
Expand Down
94 changes: 94 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/col2im.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#version 450 core

#define PRECISION ${PRECISION}
#define VEC4_T ${texel_load_type(DTYPE, OUTPUT_STORAGE)}
#define T ${texel_load_component_type(DTYPE, OUTPUT_STORAGE)}

$if OUTPUT_STORAGE == "buffer":
#define OUTPUT_BUFFER
$if INPUT_STORAGE == "buffer":
#define INPUT_BUFFER

#define TILE_M4 1
#define TILE_N4 1
#define TILE_K4 1

#define TILE_M 4
#define TILE_N 4
#define TILE_K 4

${define_required_extensions(DTYPE)}

layout(std430) buffer;

#include "conv2d_common.glslh"

${layout_declare_tensor(B, "w", "t_output", DTYPE, OUTPUT_STORAGE, is_scalar_array=False)}
${layout_declare_tensor(B, "r", "t_input", DTYPE, INPUT_STORAGE, is_scalar_array=False)}

// Sizes of the convolution output image
${layout_declare_ubo(B, "ivec4", "output_sizes")}
// Sizes of the convolution input image
${layout_declare_ubo(B, "ivec4", "input_sizes")}
// Sizes of the im2col matrix of the convolution output
${layout_declare_ubo(B, "ivec4", "matrix_sizes")}

${layout_declare_ubo(B, "Conv2DParams", "conv2d_params")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

#include "conv2d_fp_im2col_block_store.glslh"

#ifdef INPUT_BUFFER

void load_matrix_tile(
out FPOutTile tile,
const int n4,
const int m_start,
const int N4) {
[[unroll]] for (int m = 0; m < TILE_M; m++) {
tile.data[m][0] = t_input[(m_start + m) * N4 + n4];
}
}

#else // INPUT_TEXTURE

void load_matrix_tile(
out FPOutTile tile,
const int n4,
const int m_start,
const int N4) {
[[unroll]] for (int m = 0; m < TILE_M; m++) {
tile.data[m][0] = texelFetch(
t_input, ivec3(n4, m_start + m, 0), 0);
}
}

#endif // INPUT_BUFFER

void main() {
// Each thread loads and writes a 4 wide x 4 high block of the matrix
const int n4 = int(gl_GlobalInvocationID.x);
const int m4 = int(gl_GlobalInvocationID.y);

const int n = mul_4(n4);
const int m = mul_4(m4);

if (n >= matrix_sizes.x || m >= matrix_sizes.y) {
return;
}

FPOutTile tile;

const int N4 = div_4(matrix_sizes.x);
load_matrix_tile(tile, n4, m, N4);
write_im2col_tile_as_image(tile, n4, m);
}
19 changes: 19 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/col2im.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

col2im:
parameter_names_with_default_values:
DTYPE: float
OUTPUT_STORAGE: texture3d
INPUT_STORAGE: buffer
generate_variant_forall:
DTYPE:
- VALUE: half
- VALUE: float
shader_variants:
- NAME: col2im_texture3d_buffer
- NAME: col2im_texture3d_texture3d
INPUT_STORAGE: texture3d
51 changes: 51 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/conv2d_common.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef CONV2D_COMMON_GLSLH
#define CONV2D_COMMON_GLSLH

#include "common.glslh"

struct Conv2DParams {
ivec2 kernel_size;
ivec2 stride;
ivec2 padding;
ivec2 dilation;
int groups;
int out_channels_per_group;
int in_channels_per_group;
int logical_K_per_group;
int K_per_group;
int K4_per_group;
int logical_K;
int K;
int K4;
};

#ifdef DEBUG_MODE

void printConv2DParams(const Conv2DParams params) {
debugPrintfEXT("Conv2DParams: \\n");
debugPrintfEXT(
" kernel_size: %d, %d\\n", params.kernel_size.x, params.kernel_size.y);
debugPrintfEXT(" stride: %d, %d\\n", params.stride.x, params.stride.y);
debugPrintfEXT(" padding: %d, %d\\n", params.padding.x, params.padding.y);
debugPrintfEXT(" dilation: %d, %d\\n", params.dilation.x, params.dilation.y);
debugPrintfEXT(" groups: %d\\n", params.groups);
debugPrintfEXT(
" out_channels_per_group: %d\\n", params.out_channels_per_group);
debugPrintfEXT(
" in_channels_per_group: %d\\n", params.in_channels_per_group);
debugPrintfEXT(" logical_K_per_group: %d\\n", params.logical_K_per_group);
debugPrintfEXT(" K_per_group: %d\\n", params.K_per_group);
debugPrintfEXT(" K4_per_group: %d\\n", params.K4_per_group);
}

#endif // DEBUG_MODE

#endif // CONV2D_COMMON_GLSLH
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef CONV2D_FP_IM2COL_BLOCK
#define CONV2D_FP_IM2COL_BLOCK

/*
* Defines utilities to convert between (col, row) indices of an im2col matrix
* and 4-dimension tensor indices of image tensors.
*
* Requires:
* - output_sizes to be defined in the shader layout, corresponding to the sizes
* of the output image of the convolution op.
* - image_sizes to be defined in the shader layout, corresponding to the sizes
* of the input image of the convolution op.
* - conv2d_params to be defined in the shader layout
*/

#extension GL_EXT_control_flow_attributes : require

#include "common.glslh"
#include "conv2d_common.glslh"

struct Im2ColMatrixIdx {
int row;
int col;
// Relevant for grouped convolution. This indicates the column index relative
// to the first column in the group.
int col_idx_in_group;
int group_idx;
};

void unwrap_m(out TensorIndex4D out_tidx_base, const int m) {
out_tidx_base.data[3] = m / (output_sizes.y * output_sizes.x);
out_tidx_base.data[1] = (m / output_sizes.x) % output_sizes.y;
out_tidx_base.data[0] = m % output_sizes.x;

// Initialize channels to 0; assume it will be set later on
out_tidx_base.data[2] = 0;
}

void im2col_tidx_to_output_tidx(
out TensorIndex4D output_tidx,
const Im2ColMatrixIdx im2col_tidx) {
unwrap_m(output_tidx, im2col_tidx.row);
// Set channels
output_tidx.data.z = im2col_tidx.col;
}

/*
* Converts im2col matrix position to corresponding 4D tensor index, accounting
* for grouped convolutions. The conversion should ensure that all data within
* the same group occupy a contiguous block in memory.
*/
void im2col_idx_to_input_tidx(
out TensorIndex4D input_tidx,
const Im2ColMatrixIdx im2col_idx) {
TensorIndex4D output_tidx;
unwrap_m(output_tidx, im2col_idx.row);

const int in_channels_per_group = conv2d_params.in_channels_per_group;
// Determine the corresponding position within the convolution window based
// on the col index (more specifically, the col index within the group)
const int channel_within_group =
im2col_idx.col_idx_in_group % in_channels_per_group;
const int kernel_x = (im2col_idx.col_idx_in_group / in_channels_per_group) %
conv2d_params.kernel_size.x;
const int kernel_y = im2col_idx.col_idx_in_group /
(in_channels_per_group * conv2d_params.kernel_size.x);

// Calculate the actual input channel index
const int channel_idx =
im2col_idx.group_idx * conv2d_params.in_channels_per_group +
channel_within_group;

// Calculate corresponding input coordinates based on output position
// associated with the row index.
const int input_y = int(output_tidx.data.y * conv2d_params.stride.y) -
int(conv2d_params.padding.y) + int(kernel_y * conv2d_params.dilation.y);
const int input_x = int(output_tidx.data.x * conv2d_params.stride.x) -
int(conv2d_params.padding.x) + int(kernel_x * conv2d_params.dilation.x);

input_tidx.data = ivec4(input_x, input_y, channel_idx, output_tidx.data.w);
}

// 4x4 block of the im2col matrix
struct FPIm2ColBlock {
VEC4_T data[4];
};

#endif // CONV2D_FP_IM2COL_BLOCK
Loading
Loading