Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,9 @@ jobs:
CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
.ci/scripts/setup-linux.sh --build-tool "cmake"

# Custom operator tests
PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
./cmake-out/backends/vulkan/test/custom_ops/q8csw_linear

nxp-build-test:
name: nxp-build-test
Expand Down
4 changes: 4 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,10 @@ class ComputeGraph final {
return idx == kDummyValueRef ? true : values_.at(idx).isNone();
}

inline bool val_is_not_none(const ValueRef idx) {
return !val_is_none(idx);
}

inline TypeTag get_val_type(const ValueRef idx) {
return values_.at(idx).type();
}
Expand Down
51 changes: 51 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/common.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef COMMON_GLSLH
#define COMMON_GLSLH

#define mul_2(x) ((x) << 1)
#define mul_4(x) ((x) << 2)
#define mul_8(x) ((x) << 3)

#define div_2(x) ((x) >> 1)
#define div_4(x) ((x) >> 2)
#define div_8(x) ((x) >> 3)

#define div_up_2(x) (((x) + 1) >> 1)
#define div_up_4(x) (((x) + 3) >> 2)
#define div_up_8(x) (((x) + 7) >> 3)

#define align_up_2(x) ((x + 1) & -2)
#define align_up_4(x) ((x + 3) & -4)
#define align_up_8(x) ((x + 7) & -8)

#define mod_2(x) ((x) & 1)
#define mod_4(x) ((x) & 3)
#define mod_8(x) ((x) & 7)

struct TensorIndex4D {
ivec4 data;
};

#ifdef DEBUG_MODE

#extension GL_EXT_debug_printf : require

void printTensorIndex4D(const TensorIndex4D index) {
debugPrintfEXT(
"tensor_idx: %d, %d, %d, %d\\n",
index.data.x,
index.data.y,
index.data.z,
index.data.w);
}

#endif // DEBUG_MODE

#endif // COMMON_GLSLH
32 changes: 32 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/linear_common.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

/*
* Defines common functions and structs to be used across matrix multiplication
* operators.
*/

#ifndef LINEAR_COMMON_GLSLH
#define LINEAR_COMMON_GLSLH

#include "common.glslh"

int sign_extend_8bit(const int val) {
if ((val & 0x80) != 0) {
return val | (~0xFF);
}
return val;
}

int extract_8bit_from_packed_int_le(const int packed, const int i) {
// account for little endian
int byte = sign_extend_8bit(packed >> (8 * i) & 0xFF);
return byte;
}

#endif // LINEAR_COMMON_GLSLH
30 changes: 30 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/linear_fp_bias_load.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef LINEAR_FP_BIAS_LOAD_GLSLH
#define LINEAR_FP_BIAS_LOAD_GLSLH

#include "linear_fp_per_out_channel_params.glslh"

VEC4_T load_bias_x4(const int n4) {
return t_bias[n4];
}

void load_bias_tile(out FPPerOutChannelParams bias, const int n4_start) {
#if TILE_N4 == 1
bias.data[0] = load_bias_x4(n4_start);

#else
[[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
bias.data[n4] = load_bias_x4(n4_start + n4);
}

#endif
}

#endif // LINEAR_FP_BIAS_LOAD_GLSLH
45 changes: 45 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/linear_fp_input_tile.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#ifndef LINEAR_FP_INPUT_TILE_GLSLH
#define LINEAR_FP_INPUT_TILE_GLSLH

/*
* Defines the FPInputTile struct, which is used to represent a tile of the
* input matrix of a matrix multiplication operation.
*
* Settings:
* - TILE_M: number of rows in the tile
* - TILE_K4: number of (groups of 4) columns in the tile
*/

#extension GL_EXT_control_flow_attributes : require

struct FPInputTile {
VEC4_T data[TILE_M][TILE_K4];
};

#ifdef DEBUG_MODE
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not defined otherwise?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The usage of this is to be able to

#define DEBUG_MODE

in order to access debugging functions in the shader template.


void printFPInputTile(const FPInputTile in_tile) {
debugPrintfEXT("input_tile: \\n");
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
[[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
debugPrintfEXT(
" %f, %f, %f, %f, \\n",
in_tile.data[m][k4].x,
in_tile.data[m][k4].y,
in_tile.data[m][k4].z,
in_tile.data[m][k4].w);
}
}
}

#endif // DEBUG_MODE

#endif // LINEAR_FP_INPUT_TILE_GLSLH
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

/*
* Defines functions to load a FPInputTile from input buffer/texture.
*
* Requires:
* - t_input to be declared in the shader layout (input buffer/texture)
*
* Settings:
* - INPUT_BUFFER to indicate input resource is a buffer, otherwise texture is
* assumed.
*/

#ifndef LINEAR_FP_INPUT_TILE_LOAD_GLSLH
#define LINEAR_FP_INPUT_TILE_LOAD_GLSLH

#extension GL_EXT_control_flow_attributes : require

#include "linear_fp_input_tile.glslh"

#ifdef INPUT_BUFFER
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: why is this not inside load_input_x4?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's a great point. I will include this + other ifdef cleanups in a follow up diff.


VEC4_T load_input_x4(const int k4, const int m, const int ntexels_k) {
return t_input[(m * ntexels_k) + k4];
}

#else

VEC4_T load_input_x4(const int k4, const int m, const int ntexels_k) {
return texelFetch(t_input, ivec3(k4, m, 0), 0);
}

#endif // INPUT_BUFFER

// To be used if (M - m_start >= TILE_M) || (K4 - k4_start >= TILE_K4)
void load_input_tile_no_checks(
out FPInputTile in_tile,
const int k4_start,
const int m_start,
const int K4,
const int M) {
#if TILE_K4 == 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need this specialization? Does compiler not do this for you?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I plan to simplify these ifdefs in my most recent diff.

[[unroll]] for (int m = 0; m < TILE_M; ++m) {
in_tile.data[m][0] = load_input_x4(k4_start, m_start + m, K4);
}

#else
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
[[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
in_tile.data[m][k4] = load_input_x4(k4_start + k4, m_start + m, K4);
}
}
#endif
}

// To be used if near tensor boundaries
void load_input_tile_with_checks(
out FPInputTile in_tile,
const int k4_start,
const int m_start,
const int K4,
const int M) {
#if TILE_K4 == 1
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
if (m_start + m < M) {
in_tile.data[m][0] = load_input_x4(k4_start, m_start + m, K4);
} else {
in_tile.data[m][0] = VEC4_T(0.0);
}
}

#else
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
[[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
if (m_start + m < M && k4_start + k4 < K4) {
in_tile.data[m][k4] = load_input_x4(k4_start + k4, m_start + m, K4);
} else {
in_tile.data[m][k4] = VEC4_T(0.0);
}
}
}
#endif
}

#endif // LINEAR_FP_INPUT_TILE_LOAD_GLSLH
61 changes: 61 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/linear_fp_output_tile.glslh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

/*
* Defines the FPOutTile struct, which is used to represent a tile of the output
* matrix of a matrix multiplication operation.
*
* Settings:
* - TILE_M: number of rows in the output tile
* - TILE_N4: number of (groups of 4) columns in the output tile
*/

#ifndef LINEAR_FP_OUTPUT_TILE_GLSLH
#define LINEAR_FP_OUTPUT_TILE_GLSLH

#extension GL_EXT_control_flow_attributes : require

struct FPOutTile {
VEC4_T data[TILE_M][TILE_N4];
};

void initialize(out FPOutTile out_tile) {
#if TILE_N4 == 1
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
out_tile.data[m][0] = VEC4_T(0);
}

#else
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
[[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
out_tile.data[m][n4] = VEC4_T(0);
}
}
#endif
}

#ifdef DEBUG_MODE

void printFPOutTile(const FPOutTile tile) {
debugPrintfEXT("output_tile: \\n");
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
[[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
debugPrintfEXT(
" %f, %f, %f, %f,",
tile.data[m][n4].x,
tile.data[m][n4].y,
tile.data[m][n4].z,
tile.data[m][n4].w);
}
debugPrintfEXT("\\n");
}
}

#endif // DEBUG_MODE

#endif // LINEAR_FP_OUTPUT_TILE_GLSLH
Loading
Loading