Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,9 @@ jobs:
CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
.ci/scripts/setup-linux.sh --build-tool "cmake"

# Custom operator tests
PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
./cmake-out/backends/vulkan/test/custom_ops/quantized_linear

nxp-build-test:
name: nxp-build-test
Expand Down
40 changes: 40 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/common.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef COMMON_GLSLH
#define COMMON_GLSLH

#define align_up_4(x) ((x + 3) & -4)

#define div_up_4(x) (((x) + 3) >> 2)

#define mul_4(x) ((x) << 2)
#define div_4(x) ((x) >> 2)

#define mod_4(x) ((x) & 3)

struct TensorIndex4D {
ivec4 data;
};

#ifdef DEBUG_MODE

#extension GL_EXT_debug_printf : require

void printTensorIndex4D(const TensorIndex4D index) {
debugPrintfEXT(
"tensor_idx: %d, %d, %d, %d\\n",
index.data.x,
index.data.y,
index.data.z,
index.data.w);
}

#endif // DEBUG_MODE

#endif // COMMON_GLSLH
30 changes: 30 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/linear_bias_load.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef LINEAR_BIAS_LOAD_GLSLH
#define LINEAR_BIAS_LOAD_GLSLH

#include "linear_common.glslh"

VEC4_T load_bias_x4(const uint n4) {
return t_bias[n4];
}

void load_bias_tile(out FPPerOutChannelParams bias, const uint n4_start) {
#if TILE_N4 == 1
bias.data[0] = load_bias_x4(n4_start);

#else
[[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
bias.data[n4] = load_bias_x4[n4_start + n4];
}

#endif
}

#endif // LINEAR_BIAS_LOAD_GLSLH
41 changes: 41 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/linear_common.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

/*
* Defines common functions and structs to be used across matrix multiplication
* operators.
*/

#ifndef LINEAR_COMMON_GLSLH
#define LINEAR_COMMON_GLSLH

#include "common.glslh"

// Represents floating point parameter tensors where each element is associated
// with an output channel, such as weight scales, biases, etc.
struct FPPerOutChannelParams {
VEC4_T data[TILE_N4];
};

#ifdef DEBUG_MODE

void printFPPerOutChannelParams(const FPPerOutChannelParams params) {
debugPrintfEXT("per_out_channel_params: \\n");
[[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
debugPrintfEXT(
" %f, %f, %f, %f, \\n",
params.data[n4].x,
params.data[n4].y,
params.data[n4].z,
params.data[n4].w);
}
}

#endif // DEBUG_MODE

#endif // LINEAR_COMMON_GLSLH
43 changes: 43 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/linear_fp_input_tile.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef LINEAR_FP_INPUT_TILE_GLSLH
#define LINEAR_FP_INPUT_TILE_GLSLH

/*
* Defines the FPInputTile struct, which is used to represent a tile of the
* input matrix of a matrix multiplication operation.
*
* Settings:
* - TILE_M: number of rows in the tile
* - TILE_K4: number of (groups of 4) columns in the tile
*/

struct FPInputTile {
VEC4_T data[TILE_M][TILE_K4];
};

#ifdef DEBUG_MODE
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not defined otherwise?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The usage of this is to be able to

#define DEBUG_MODE

in order to access debugging functions in the shader template.


void printFPInputTile(const FPInputTile in_tile) {
debugPrintfEXT("input_tile: \\n");
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
[[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
debugPrintfEXT(
" %f, %f, %f, %f, \\n",
in_tile.data[m][k4].x,
in_tile.data[m][k4].y,
in_tile.data[m][k4].z,
in_tile.data[m][k4].w);
}
}
}

#endif // DEBUG_MODE

#endif // LINEAR_FP_INPUT_TILE_GLSLH
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

/*
* Defines functions to load a FPInputTile from input buffer/texture.
*
* Requires:
* - t_input to be declared in the shader layout (input buffer/texture)
*
* Settings:
* - INPUT_BUFFER to indicate input resource is a buffer, otherwise texture is
* assumed.
*/

#ifndef LINEAR_FP_INPUT_TILE_LOAD_GLSLH
#define LINEAR_FP_INPUT_TILE_LOAD_GLSLH

#extension GL_EXT_control_flow_attributes : require

#include "linear_fp_input_tile.glslh"

#ifdef INPUT_BUFFER
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: why is this not inside load_input_x4?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's a great point. I will include this + other ifdef cleanups in a follow up diff.


VEC4_T load_input_x4(const uint k4, const uint m, const uint ntexels_k) {
return t_input[(m * ntexels_k) + k4];
}

#else

VEC4_T load_input_x4(const uint k4, const uint m, const uint ntexels_k) {
return texelFetch(t_input, ivec3(k4, m, 0), 0);
}

#endif // INPUT_BUFFER

// To be used if (M - m_start >= TILE_M) || (K4 - k4_start >= TILE_K4)
void load_input_tile_no_checks(
out FPInputTile in_tile,
const uint k4_start,
const uint m_start,
const uint K4,
const uint M) {
#if TILE_K4 == 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need this specialization? Does compiler not do this for you?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I plan to simplify these ifdefs in my most recent diff.

[[unroll]] for (int m = 0; m < TILE_M; ++m) {
in_tile.data[m][0] = load_input_x4(k4_start, m_start + m, K4);
}

#else
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
[[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
in_tile.data[m][k4] = load_input_x4(k4_start + k4, m_start + m, K4);
}
}
#endif
}

// To be used if near tensor boundaries
void load_input_tile_with_checks(
out FPInputTile in_tile,
const uint k4_start,
const uint m_start,
const uint K4,
const uint M) {
#if TILE_K4 == 1
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
if (m_start + m < M) {
in_tile.data[m][0] = load_input_x4(k4_start, m_start + m, K4);
} else {
in_tile.data[m][0] = VEC4_T(0.0);
}
}

#else
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
[[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
if (m_start + m < M && k4_start + k4 < K4) {
in_tile.data[m][k4] = load_input_x4(k4_start + k4, m_start + m, K4);
} else {
in_tile.data[m][k4] = VEC4_T(0.0);
}
}
}
#endif
}

#endif // LINEAR_FP_INPUT_TILE_LOAD_GLSLH
60 changes: 60 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/linear_fp_output_tile.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

/*
* Defines the FPOutTile struct, which is used to represent a tile of the output
* matrix of a matrix multiplication operation.
*
* Settings:
* - TILE_M: number of rows in the output tile
* - TILE_N4: number of (groups of 4) columns in the output tile
*/

#ifndef LINEAR_FP_OUTPUT_TILE_GLSLH
#define LINEAR_FP_OUTPUT_TILE_GLSLH

#extension GL_EXT_control_flow_attributes : require

struct FPOutTile {
VEC4_T data[TILE_M][TILE_N4];
};

void initialize(out FPOutTile out_tile) {
#if TILE_M > 1 && TILE_N4 == 1
[[unroll]] for (int y = 0; y < TILE_M; ++y) {
out_tile.data[y][0] = VEC4_T(0);
}

#else
[[unroll]] for (int y = 0; y < TILE_M; ++y) {
[[unroll]] for (int x4 = 0; x4 < TILE_K4; ++x4) {
out_tile.data[y][x4] = VEC4_T(0);
}
}
#endif
}

#ifdef DEBUG_MODE

void printFPOutputTile(const FPOutTile tile) {
debugPrintfEXT("output_tile: \\n");
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
[[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
debugPrintfEXT(
" %f, %f, %f, %f, \\n",
tile.data[m][n4].x,
tile.data[m][n4].y,
tile.data[m][n4].z,
tile.data[m][n4].w);
}
}
}

#endif // DEBUG_MODE

#endif // LINEAR_FP_OUTPUT_TILE_GLSLH
Loading
Loading