-
Notifications
You must be signed in to change notification settings - Fork 698
[ET-VK] Quantized Int8 Linear #13816
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
a5d8a02
782d62c
c4d272f
29d1118
005ffd3
92fabb5
e492d57
e2c6cd4
be47894
a29d61c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| /* | ||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| * All rights reserved. | ||
| * | ||
| * This source code is licensed under the BSD-style license found in the | ||
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| #ifndef COMMON_GLSLH | ||
| #define COMMON_GLSLH | ||
|
|
||
| #define align_up_4(x) ((x + 3) & -4) | ||
|
|
||
| #define div_up_4(x) (((x) + 3) >> 2) | ||
|
|
||
| #define mul_4(x) ((x) << 2) | ||
| #define div_4(x) ((x) >> 2) | ||
|
|
||
| #define mod_4(x) ((x) & 3) | ||
|
|
||
| struct TensorIndex4D { | ||
| ivec4 data; | ||
| }; | ||
|
|
||
| #ifdef DEBUG_MODE | ||
|
|
||
| #extension GL_EXT_debug_printf : require | ||
|
|
||
| void printTensorIndex4D(const TensorIndex4D index) { | ||
| debugPrintfEXT( | ||
| "tensor_idx: %d, %d, %d, %d\\n", | ||
| index.data.x, | ||
| index.data.y, | ||
| index.data.z, | ||
| index.data.w); | ||
| } | ||
|
|
||
| #endif // DEBUG_MODE | ||
|
|
||
| #endif // COMMON_GLSLH |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| /* | ||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| * All rights reserved. | ||
| * | ||
| * This source code is licensed under the BSD-style license found in the | ||
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| #ifndef LINEAR_BIAS_LOAD_GLSLH | ||
| #define LINEAR_BIAS_LOAD_GLSLH | ||
|
|
||
| #include "linear_common.glslh" | ||
|
|
||
| VEC4_T load_bias_x4(const uint n4) { | ||
| return t_bias[n4]; | ||
| } | ||
|
|
||
| void load_bias_tile(out FPPerOutChannelParams bias, const uint n4_start) { | ||
| #if TILE_N4 == 1 | ||
| bias.data[0] = load_bias_x4(n4_start); | ||
|
|
||
| #else | ||
| [[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) { | ||
| bias.data[n4] = load_bias_x4[n4_start + n4]; | ||
| } | ||
|
|
||
| #endif | ||
| } | ||
|
|
||
| #endif // LINEAR_BIAS_LOAD_GLSLH |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| /* | ||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| * All rights reserved. | ||
| * | ||
| * This source code is licensed under the BSD-style license found in the | ||
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| /* | ||
| * Defines common functions and structs to be used across matrix multiplication | ||
| * operators. | ||
| */ | ||
|
|
||
| #ifndef LINEAR_COMMON_GLSLH | ||
| #define LINEAR_COMMON_GLSLH | ||
|
|
||
| #include "common.glslh" | ||
|
|
||
| // Represents floating point parameter tensors where each element is associated | ||
| // with an output channel, such as weight scales, biases, etc. | ||
| struct FPPerOutChannelParams { | ||
| VEC4_T data[TILE_N4]; | ||
| }; | ||
|
|
||
| #ifdef DEBUG_MODE | ||
|
|
||
| void printFPPerOutChannelParams(const FPPerOutChannelParams params) { | ||
| debugPrintfEXT("per_out_channel_params: \\n"); | ||
| [[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) { | ||
| debugPrintfEXT( | ||
| " %f, %f, %f, %f, \\n", | ||
| params.data[n4].x, | ||
| params.data[n4].y, | ||
| params.data[n4].z, | ||
| params.data[n4].w); | ||
| } | ||
| } | ||
|
|
||
| #endif // DEBUG_MODE | ||
|
|
||
| #endif // LINEAR_COMMON_GLSLH |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| /* | ||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| * All rights reserved. | ||
| * | ||
| * This source code is licensed under the BSD-style license found in the | ||
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| #ifndef LINEAR_FP_INPUT_TILE_GLSLH | ||
| #define LINEAR_FP_INPUT_TILE_GLSLH | ||
|
|
||
| /* | ||
| * Defines the FPInputTile struct, which is used to represent a tile of the | ||
| * input matrix of a matrix multiplication operation. | ||
| * | ||
| * Settings: | ||
| * - TILE_M: number of rows in the tile | ||
| * - TILE_K4: number of (groups of 4) columns in the tile | ||
| */ | ||
|
|
||
| struct FPInputTile { | ||
| VEC4_T data[TILE_M][TILE_K4]; | ||
| }; | ||
|
|
||
| #ifdef DEBUG_MODE | ||
|
|
||
| void printFPInputTile(const FPInputTile in_tile) { | ||
| debugPrintfEXT("input_tile: \\n"); | ||
| [[unroll]] for (int m = 0; m < TILE_M; ++m) { | ||
| [[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) { | ||
| debugPrintfEXT( | ||
| " %f, %f, %f, %f, \\n", | ||
| in_tile.data[m][k4].x, | ||
| in_tile.data[m][k4].y, | ||
| in_tile.data[m][k4].z, | ||
| in_tile.data[m][k4].w); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif // DEBUG_MODE | ||
|
|
||
| #endif // LINEAR_FP_INPUT_TILE_GLSLH | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| /* | ||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| * All rights reserved. | ||
| * | ||
| * This source code is licensed under the BSD-style license found in the | ||
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| /* | ||
| * Defines functions to load a FPInputTile from input buffer/texture. | ||
| * | ||
| * Requires: | ||
| * - t_input to be declared in the shader layout (input buffer/texture) | ||
| * | ||
| * Settings: | ||
| * - INPUT_BUFFER to indicate input resource is a buffer, otherwise texture is | ||
| * assumed. | ||
| */ | ||
|
|
||
| #ifndef LINEAR_FP_INPUT_TILE_LOAD_GLSLH | ||
| #define LINEAR_FP_INPUT_TILE_LOAD_GLSLH | ||
|
|
||
| #extension GL_EXT_control_flow_attributes : require | ||
|
|
||
| #include "linear_fp_input_tile.glslh" | ||
|
|
||
| #ifdef INPUT_BUFFER | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: why is this not inside There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's a great point. I will include this + other ifdef cleanups in a follow up diff. |
||
|
|
||
| VEC4_T load_input_x4(const uint k4, const uint m, const uint ntexels_k) { | ||
| return t_input[(m * ntexels_k) + k4]; | ||
| } | ||
|
|
||
| #else | ||
|
|
||
| VEC4_T load_input_x4(const uint k4, const uint m, const uint ntexels_k) { | ||
| return texelFetch(t_input, ivec3(k4, m, 0), 0); | ||
| } | ||
|
|
||
| #endif // INPUT_BUFFER | ||
|
|
||
| // To be used if (M - m_start >= TILE_M) || (K4 - k4_start >= TILE_K4) | ||
| void load_input_tile_no_checks( | ||
| out FPInputTile in_tile, | ||
| const uint k4_start, | ||
| const uint m_start, | ||
| const uint K4, | ||
| const uint M) { | ||
| #if TILE_K4 == 1 | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need this specialization? Does compiler not do this for you? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I plan to simplify these ifdefs in my most recent diff. |
||
| [[unroll]] for (int m = 0; m < TILE_M; ++m) { | ||
| in_tile.data[m][0] = load_input_x4(k4_start, m_start + m, K4); | ||
| } | ||
|
|
||
| #else | ||
| [[unroll]] for (int m = 0; m < TILE_M; ++m) { | ||
| [[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) { | ||
| in_tile.data[m][k4] = load_input_x4(k4_start + k4, m_start + m, K4); | ||
| } | ||
| } | ||
| #endif | ||
| } | ||
|
|
||
| // To be used if near tensor boundaries | ||
| void load_input_tile_with_checks( | ||
| out FPInputTile in_tile, | ||
| const uint k4_start, | ||
| const uint m_start, | ||
| const uint K4, | ||
| const uint M) { | ||
| #if TILE_K4 == 1 | ||
| [[unroll]] for (int m = 0; m < TILE_M; ++m) { | ||
| if (m_start + m < M) { | ||
| in_tile.data[m][0] = load_input_x4(k4_start, m_start + m, K4); | ||
| } else { | ||
| in_tile.data[m][0] = VEC4_T(0.0); | ||
| } | ||
| } | ||
|
|
||
| #else | ||
| [[unroll]] for (int m = 0; m < TILE_M; ++m) { | ||
| [[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) { | ||
| if (m_start + m < M && k4_start + k4 < K4) { | ||
| in_tile.data[m][k4] = load_input_x4(k4_start + k4, m_start + m, K4); | ||
| } else { | ||
| in_tile.data[m][k4] = VEC4_T(0.0); | ||
| } | ||
| } | ||
| } | ||
| #endif | ||
| } | ||
|
|
||
| #endif // LINEAR_FP_INPUT_TILE_LOAD_GLSLH | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| /* | ||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||
| * All rights reserved. | ||
| * | ||
| * This source code is licensed under the BSD-style license found in the | ||
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| /* | ||
| * Defines the FPOutTile struct, which is used to represent a tile of the output | ||
| * matrix of a matrix multiplication operation. | ||
| * | ||
| * Settings: | ||
| * - TILE_M: number of rows in the output tile | ||
| * - TILE_N4: number of (groups of 4) columns in the output tile | ||
| */ | ||
|
|
||
| #ifndef LINEAR_FP_OUTPUT_TILE_GLSLH | ||
| #define LINEAR_FP_OUTPUT_TILE_GLSLH | ||
|
|
||
| #extension GL_EXT_control_flow_attributes : require | ||
|
|
||
| struct FPOutTile { | ||
| VEC4_T data[TILE_M][TILE_N4]; | ||
| }; | ||
|
|
||
| void initialize(out FPOutTile out_tile) { | ||
| #if TILE_M > 1 && TILE_N4 == 1 | ||
| [[unroll]] for (int y = 0; y < TILE_M; ++y) { | ||
| out_tile.data[y][0] = VEC4_T(0); | ||
| } | ||
|
|
||
| #else | ||
| [[unroll]] for (int y = 0; y < TILE_M; ++y) { | ||
| [[unroll]] for (int x4 = 0; x4 < TILE_K4; ++x4) { | ||
| out_tile.data[y][x4] = VEC4_T(0); | ||
| } | ||
| } | ||
| #endif | ||
| } | ||
|
|
||
| #ifdef DEBUG_MODE | ||
|
|
||
| void printFPOutputTile(const FPOutTile tile) { | ||
| debugPrintfEXT("output_tile: \\n"); | ||
| [[unroll]] for (int m = 0; m < TILE_M; ++m) { | ||
| [[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) { | ||
| debugPrintfEXT( | ||
| " %f, %f, %f, %f, \\n", | ||
| tile.data[m][n4].x, | ||
| tile.data[m][n4].y, | ||
| tile.data[m][n4].z, | ||
| tile.data[m][n4].w); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif // DEBUG_MODE | ||
|
|
||
| #endif // LINEAR_FP_OUTPUT_TILE_GLSLH |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not defined otherwise?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The usage of this is to be able to
in order to access debugging functions in the shader template.