Skip to content

Commit a5d8a02

Browse files
author
ssjia
committed
[ET-VK] Quantized Int8 Linear
Title says it all! This PR adds implementations for int8 linear layers. Convolution is implemented in a later step, computing convolution as matrix multiplication via the im2col procedure. For both linear and convolution, two versions are implemented: 1. `q8ta_q8csw` variant which quantized the input tensor and then performs integer accumulation via the int8 dot product extension 2. `q8csw` variant which dequantized the weight tensor in-shader and performs floating point accumulation. The second one is needed to provide an alternative path for executing quantized models if the target GPU does not support int8 dot product extension. These new ops are tested via the custom op testing + benchmarking framework introduced in the previous diff. Differential Revision: [D81323424](https://our.internmc.facebook.com/intern/diff/D81323424/) [ghstack-poisoned]
1 parent d203d70 commit a5d8a02

32 files changed

+2716
-0
lines changed

.github/workflows/pull.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,7 +928,9 @@ jobs:
928928
CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
929929
.ci/scripts/setup-linux.sh --build-tool "cmake"
930930
931+
# Custom operator tests
931932
PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
933+
./cmake-out/backends/vulkan/test/custom_ops/quantized_linear
932934
933935
nxp-build-test:
934936
name: nxp-build-test
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#ifndef COMMON_GLSLH
10+
#define COMMON_GLSLH
11+
12+
#define align_up_4(x) ((x + 3) & -4)
13+
14+
#define div_up_4(x) (((x) + 3) >> 2)
15+
16+
#define mul_4(x) ((x) << 2)
17+
#define div_4(x) ((x) >> 2)
18+
19+
#define mod_4(x) ((x) & 3)
20+
21+
struct TensorIndex4D {
22+
ivec4 data;
23+
};
24+
25+
#ifdef DEBUG_MODE
26+
27+
#extension GL_EXT_debug_printf : require
28+
29+
void printTensorIndex4D(const TensorIndex4D index) {
30+
debugPrintfEXT(
31+
"tensor_idx: %d, %d, %d, %d\\n",
32+
index.data.x,
33+
index.data.y,
34+
index.data.z,
35+
index.data.w);
36+
}
37+
38+
#endif // DEBUG_MODE
39+
40+
#endif // COMMON_GLSLH
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#ifndef LINEAR_BIAS_LOAD_GLSLH
10+
#define LINEAR_BIAS_LOAD_GLSLH
11+
12+
#include "linear_common.glslh"
13+
14+
VEC4_T load_bias_x4(const uint n4) {
15+
return t_bias[n4];
16+
}
17+
18+
void load_bias_tile(out FPPerOutChannelParams bias, const uint n4_start) {
19+
#if TILE_N4 == 1
20+
bias.data[0] = load_bias_x4(n4_start);
21+
22+
#else
23+
[[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
24+
bias.data[n4] = load_bias_x4[n4_start + n4];
25+
}
26+
27+
#endif
28+
}
29+
30+
#endif // LINEAR_BIAS_LOAD_GLSLH
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
/*
10+
* Defines common functions and structs to be used across matrix multiplication
11+
* operators.
12+
*/
13+
14+
#ifndef LINEAR_COMMON_GLSLH
15+
#define LINEAR_COMMON_GLSLH
16+
17+
#include "common.glslh"
18+
19+
// Represents floating point parameter tensors where each element is associated
20+
// with an output channel, such as weight scales, biases, etc.
21+
struct FPPerOutChannelParams {
22+
VEC4_T data[TILE_N4];
23+
};
24+
25+
#ifdef DEBUG_MODE
26+
27+
void printFPPerOutChannelParams(const FPPerOutChannelParams params) {
28+
debugPrintfEXT("per_out_channel_params: \\n");
29+
[[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
30+
debugPrintfEXT(
31+
" %f, %f, %f, %f, \\n",
32+
params.data[n4].x,
33+
params.data[n4].y,
34+
params.data[n4].z,
35+
params.data[n4].w);
36+
}
37+
}
38+
39+
#endif // DEBUG_MODE
40+
41+
#endif // LINEAR_COMMON_GLSLH
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#ifndef LINEAR_FP_INPUT_TILE_GLSLH
10+
#define LINEAR_FP_INPUT_TILE_GLSLH
11+
12+
/*
13+
* Defines the FPInputTile struct, which is used to represent a tile of the
14+
* input matrix of a matrix multiplication operation.
15+
*
16+
* Settings:
17+
* - TILE_M: number of rows in the tile
18+
* - TILE_K4: number of (groups of 4) columns in the tile
19+
*/
20+
21+
struct FPInputTile {
22+
VEC4_T data[TILE_M][TILE_K4];
23+
};
24+
25+
#ifdef DEBUG_MODE
26+
27+
void printFPInputTile(const FPInputTile in_tile) {
28+
debugPrintfEXT("input_tile: \\n");
29+
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
30+
[[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
31+
debugPrintfEXT(
32+
" %f, %f, %f, %f, \\n",
33+
in_tile.data[m][k4].x,
34+
in_tile.data[m][k4].y,
35+
in_tile.data[m][k4].z,
36+
in_tile.data[m][k4].w);
37+
}
38+
}
39+
}
40+
41+
#endif // DEBUG_MODE
42+
43+
#endif // LINEAR_FP_INPUT_TILE_GLSLH
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
/*
10+
* Defines functions to load a FPInputTile from input buffer/texture.
11+
*
12+
* Requires:
13+
* - t_input to be declared in the shader layout (input buffer/texture)
14+
*
15+
* Settings:
16+
* - INPUT_BUFFER to indicate input resource is a buffer, otherwise texture is
17+
* assumed.
18+
*/
19+
20+
#ifndef LINEAR_FP_INPUT_TILE_LOAD_GLSLH
21+
#define LINEAR_FP_INPUT_TILE_LOAD_GLSLH
22+
23+
#extension GL_EXT_control_flow_attributes : require
24+
25+
#include "linear_fp_input_tile.glslh"
26+
27+
#ifdef INPUT_BUFFER
28+
29+
VEC4_T load_input_x4(const uint k4, const uint m, const uint ntexels_k) {
30+
return t_input[(m * ntexels_k) + k4];
31+
}
32+
33+
#else
34+
35+
VEC4_T load_input_x4(const uint k4, const uint m, const uint ntexels_k) {
36+
return texelFetch(t_input, ivec3(k4, m, 0), 0);
37+
}
38+
39+
#endif // INPUT_BUFFER
40+
41+
// To be used if (M - m_start >= TILE_M) || (K4 - k4_start >= TILE_K4)
42+
void load_input_tile_no_checks(
43+
out FPInputTile in_tile,
44+
const uint k4_start,
45+
const uint m_start,
46+
const uint K4,
47+
const uint M) {
48+
#if TILE_K4 == 1
49+
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
50+
in_tile.data[m][0] = load_input_x4(k4_start, m_start + m, K4);
51+
}
52+
53+
#else
54+
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
55+
[[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
56+
in_tile.data[m][k4] = load_input_x4(k4_start + k4, m_start + m, K4);
57+
}
58+
}
59+
#endif
60+
}
61+
62+
// To be used if near tensor boundaries
63+
void load_input_tile_with_checks(
64+
out FPInputTile in_tile,
65+
const uint k4_start,
66+
const uint m_start,
67+
const uint K4,
68+
const uint M) {
69+
#if TILE_K4 == 1
70+
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
71+
if (m_start + m < M) {
72+
in_tile.data[m][0] = load_input_x4(k4_start, m_start + m, K4);
73+
} else {
74+
in_tile.data[m][0] = VEC4_T(0.0);
75+
}
76+
}
77+
78+
#else
79+
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
80+
[[unroll]] for (int k4 = 0; k4 < TILE_K4; ++k4) {
81+
if (m_start + m < M && k4_start + k4 < K4) {
82+
in_tile.data[m][k4] = load_input_x4(k4_start + k4, m_start + m, K4);
83+
} else {
84+
in_tile.data[m][k4] = VEC4_T(0.0);
85+
}
86+
}
87+
}
88+
#endif
89+
}
90+
91+
#endif // LINEAR_FP_INPUT_TILE_LOAD_GLSLH
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
/*
10+
* Defines the FPOutTile struct, which is used to represent a tile of the output
11+
* matrix of a matrix multiplication operation.
12+
*
13+
* Settings:
14+
* - TILE_M: number of rows in the output tile
15+
* - TILE_N4: number of (groups of 4) columns in the output tile
16+
*/
17+
18+
#ifndef LINEAR_FP_OUTPUT_TILE_GLSLH
19+
#define LINEAR_FP_OUTPUT_TILE_GLSLH
20+
21+
#extension GL_EXT_control_flow_attributes : require
22+
23+
struct FPOutTile {
24+
VEC4_T data[TILE_M][TILE_N4];
25+
};
26+
27+
void initialize(out FPOutTile out_tile) {
28+
#if TILE_M > 1 && TILE_N4 == 1
29+
[[unroll]] for (int y = 0; y < TILE_M; ++y) {
30+
out_tile.data[y][0] = VEC4_T(0);
31+
}
32+
33+
#else
34+
[[unroll]] for (int y = 0; y < TILE_M; ++y) {
35+
[[unroll]] for (int x4 = 0; x4 < TILE_K4; ++x4) {
36+
out_tile.data[y][x4] = VEC4_T(0);
37+
}
38+
}
39+
#endif
40+
}
41+
42+
#ifdef DEBUG_MODE
43+
44+
void printFPOutputTile(const FPOutTile tile) {
45+
debugPrintfEXT("output_tile: \\n");
46+
[[unroll]] for (int m = 0; m < TILE_M; ++m) {
47+
[[unroll]] for (int n4 = 0; n4 < TILE_N4; ++n4) {
48+
debugPrintfEXT(
49+
" %f, %f, %f, %f, \\n",
50+
tile.data[m][n4].x,
51+
tile.data[m][n4].y,
52+
tile.data[m][n4].z,
53+
tile.data[m][n4].w);
54+
}
55+
}
56+
}
57+
58+
#endif // DEBUG_MODE
59+
60+
#endif // LINEAR_FP_OUTPUT_TILE_GLSLH

0 commit comments

Comments
 (0)