Skip to content

Commit ece0f49

Browse files
author
ssjia
committed
[ET-VK] Conv2d quantize/dequantize ops for conv2d activations
## Context As title. Add shaders to quantize a floating point conv2d input tensor to packed int8 memory layout and dequantize a int8 conv2d output tensor back to floating point representation. Hooking it up to the export logic will be handled in a follow up diff. Differential Revision: [D82542335](https://our.internmc.facebook.com/intern/diff/D82542335/) [ghstack-poisoned]
1 parent 1ca431c commit ece0f49

16 files changed

+867
-13
lines changed

backends/vulkan/runtime/graph/ops/glsl/common.glslh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,30 @@ struct TensorIndex4D {
3333
ivec4 data;
3434
};
3535

36+
int sign_extend_8bit(const int val) {
37+
if ((val & 0x80) != 0) {
38+
return val | (~0xFF);
39+
}
40+
return val;
41+
}
42+
43+
int extract_8bit_from_packed_int_le(const int packed, const int i) {
44+
// account for little endian
45+
int byte = sign_extend_8bit(packed >> (8 * i) & 0xFF);
46+
return byte;
47+
}
48+
49+
int pack_4xqint_into_int32(
50+
const int val0,
51+
const int val1,
52+
const int val2,
53+
const int val3) {
54+
int packed = (val0 & 0xFF) | ((val1 & 0xFF) << 8) | ((val2 & 0xFF) << 16) |
55+
((val3 & 0xFF) << 24);
56+
57+
return packed;
58+
}
59+
3660
#ifdef DEBUG_MODE
3761

3862
#extension GL_EXT_debug_printf : require

backends/vulkan/runtime/graph/ops/glsl/conv2d_common.glslh

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,48 @@ struct Conv2DParams {
2727
int K4;
2828
};
2929

30+
struct Conv2dTensorIndex {
31+
ivec3 data;
32+
int texel_i;
33+
};
34+
35+
struct Conv2dBlockIndex {
36+
ivec3 data;
37+
};
38+
39+
Conv2dTensorIndex block_idx_to_tensor_idx(const Conv2dBlockIndex block_idx) {
40+
Conv2dTensorIndex tensor_idx;
41+
tensor_idx.data.x = mul_4(block_idx.data.x);
42+
tensor_idx.data.y = block_idx.data.y;
43+
tensor_idx.data.z = block_idx.data.z;
44+
tensor_idx.texel_i = 0;
45+
return tensor_idx;
46+
}
47+
48+
struct Conv2dBlockExtents {
49+
ivec3 data;
50+
int data_xz;
51+
};
52+
53+
Conv2dBlockExtents make_block_extents(const ivec4 tensor_sizes) {
54+
Conv2dBlockExtents block_sizes;
55+
block_sizes.data.x = div_up_4(tensor_sizes.x);
56+
block_sizes.data.y = tensor_sizes.y;
57+
block_sizes.data.z = div_up_4(tensor_sizes.z);
58+
59+
block_sizes.data_xz = block_sizes.data.x * block_sizes.data.z;
60+
61+
return block_sizes;
62+
}
63+
64+
bool block_idx_out_of_bounds(
65+
const Conv2dBlockIndex block_idx,
66+
const Conv2dBlockExtents block_extents) {
67+
return block_idx.data.x >= block_extents.data.x ||
68+
block_idx.data.y >= block_extents.data.y ||
69+
block_idx.data.z >= block_extents.data.z;
70+
}
71+
3072
#ifdef DEBUG_MODE
3173

3274
void printConv2DParams(const Conv2DParams params) {
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#ifndef CONV2D_FP_ACTIVATION_BLOCK
10+
#define CONV2D_FP_ACTIVATION_BLOCK
11+
12+
#extension GL_EXT_control_flow_attributes : require
13+
14+
struct FPActivationBlock {
15+
VEC4_T data[4];
16+
};
17+
18+
#ifdef DEBUG_MODE
19+
20+
#extension GL_EXT_debug_printf : require
21+
22+
void printFPActivationBlock(const FPActivationBlock activation_block) {
23+
debugPrintfEXT("fp activation_block: \\n");
24+
[[unroll]] for (int w = 0; w < 4; ++w) {
25+
debugPrintfEXT(
26+
" [%d]: %f, %f, %f, %f, \\n",
27+
w,
28+
activation_block.data[w].x,
29+
activation_block.data[w].y,
30+
activation_block.data[w].z,
31+
activation_block.data[w].w);
32+
}
33+
}
34+
35+
#endif // DEBUG_MODE
36+
37+
#endif // CONV2D_FP_ACTIVATION_BLOCK
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#ifndef CONV2D_FP_ACTIVATION_BLOCK_LOAD
10+
#define CONV2D_FP_ACTIVATION_BLOCK_LOAD
11+
12+
#extension GL_EXT_control_flow_attributes : require
13+
14+
#include "conv2d_fp_activation_block.glslh"
15+
16+
VEC4_T load_fp_input_texel(const Conv2dTensorIndex tidx) {
17+
return texelFetch(t_fp_input, tidx.data, 0);
18+
}
19+
20+
void load_fp_input_block(
21+
out FPActivationBlock block,
22+
const Conv2dBlockIndex block_idx) {
23+
Conv2dTensorIndex load_tidx = block_idx_to_tensor_idx(block_idx);
24+
[[unroll]] for (int w = 0; w < 4; w++) {
25+
block.data[w] = load_fp_input_texel(load_tidx);
26+
load_tidx.data.x++;
27+
}
28+
}
29+
30+
#endif // CONV2D_FP_ACTIVATION_BLOCK_LOAD
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#ifndef CONV2D_FP_ACTIVATION_BLOCK_STORE
10+
#define CONV2D_FP_ACTIVATION_BLOCK_STORE
11+
12+
#extension GL_EXT_control_flow_attributes : require
13+
14+
#include "conv2d_fp_activation_block.glslh"
15+
16+
void store_fp_output_texel(
17+
const Conv2dTensorIndex tidx,
18+
const VEC4_T out_texel) {
19+
imageStore(t_fp_output, tidx.data, out_texel);
20+
}
21+
22+
void store_fp_activation_block(
23+
const FPActivationBlock block,
24+
const Conv2dBlockIndex block_idx) {
25+
Conv2dTensorIndex store_tidx = block_idx_to_tensor_idx(block_idx);
26+
[[unroll]] for (int w = 0; w < 4; w++) {
27+
store_fp_output_texel(store_tidx, block.data[w]);
28+
store_tidx.data.x++;
29+
}
30+
}
31+
32+
#endif // CONV2D_FP_ACTIVATION_BLOCK_STORE
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#ifndef CONV2D_INT8_ACTIVATION_BLOCK
10+
#define CONV2D_INT8_ACTIVATION_BLOCK
11+
12+
#extension GL_EXT_control_flow_attributes : require
13+
14+
#include "common.glslh"
15+
#include "conv2d_fp_activation_block.glslh"
16+
17+
struct Int8ActivationBlock {
18+
ivec4 data;
19+
};
20+
21+
ivec4 quantize_to_8bit(
22+
const VEC4_T val,
23+
const float q_inv_scale,
24+
const int q_zero_point) {
25+
vec4 quantized = round(vec4(val) * q_inv_scale) + q_zero_point;
26+
return clamp(ivec4(quantized), -128, 127);
27+
}
28+
29+
void quantize_fp_activation_block(
30+
out Int8ActivationBlock out_block,
31+
const FPActivationBlock in_block,
32+
const float q_inv_scale,
33+
const int q_zero_point) {
34+
[[unroll]] for (int w = 0; w < 4; ++w) {
35+
ivec4 quantized_texel =
36+
quantize_to_8bit(in_block.data[w], q_inv_scale, q_zero_point);
37+
out_block.data[w] = pack_4xqint_into_int32(
38+
quantized_texel[0],
39+
quantized_texel[1],
40+
quantized_texel[2],
41+
quantized_texel[3]);
42+
}
43+
}
44+
45+
VEC4_T
46+
dequantize_8bit(const ivec4 val, const float q_scale, const int q_zero_point) {
47+
return VEC4_T(val - q_zero_point) * q_scale;
48+
}
49+
50+
void dequantize_int8_activation_block(
51+
out FPActivationBlock out_block,
52+
const Int8ActivationBlock in_block,
53+
const float q_scale,
54+
const int q_zero_point) {
55+
[[unroll]] for (int w = 0; w < 4; ++w) {
56+
int packed = in_block.data[w];
57+
out_block.data[w] = dequantize_8bit(
58+
ivec4(
59+
extract_8bit_from_packed_int_le(packed, 0),
60+
extract_8bit_from_packed_int_le(packed, 1),
61+
extract_8bit_from_packed_int_le(packed, 2),
62+
extract_8bit_from_packed_int_le(packed, 3)),
63+
q_scale,
64+
q_zero_point);
65+
}
66+
}
67+
68+
#ifdef DEBUG_MODE
69+
70+
void printInt8ActivationBlock(const Int8ActivationBlock block) {
71+
debugPrintfEXT("Int8ActivationBlock: \\n");
72+
[[unroll]] for (int row = 0; row < 4; ++row) {
73+
int packed = block.data[row];
74+
debugPrintfEXT(
75+
" [%d]: %d, %d, %d, %d, \\n",
76+
row,
77+
extract_8bit_from_packed_int_le(packed, 0),
78+
extract_8bit_from_packed_int_le(packed, 1),
79+
extract_8bit_from_packed_int_le(packed, 2),
80+
extract_8bit_from_packed_int_le(packed, 3));
81+
}
82+
}
83+
84+
#endif // DEBUG_MODE
85+
86+
#endif // CONV2D_INT8_ACTIVATION_BLOCK
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#ifndef CONV2D_INT8_INPUT_BLOCK_STORE
10+
#define CONV2D_INT8_INPUT_BLOCK_STORE
11+
12+
#extension GL_EXT_control_flow_attributes : require
13+
14+
#include "conv2d_common.glslh"
15+
#include "conv2d_int8_activation_block.glslh"
16+
17+
void store_packed_int8_input_block(
18+
const Conv2dBlockIndex block_idx,
19+
const Conv2dBlockExtents block_extents,
20+
const Int8ActivationBlock packed_int8_block) {
21+
#ifdef OUTPUT_BUFFER
22+
const int buffer_idx = block_idx.data.y * block_extents.data_xz +
23+
block_idx.data.x * block_extents.data.z + block_idx.data.z;
24+
t_packed_int8_input[buffer_idx] = packed_int8_block.data;
25+
#else
26+
imageStore(t_packed_int8_input, block_idx.data, packed_int8_block.data);
27+
#endif
28+
}
29+
30+
#endif // CONV2D_INT8_INPUT_BLOCK_STORE
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#ifndef CONV2D_INT8_OUTPUT_BLOCK_STORE
10+
#define CONV2D_INT8_OUTPUT_BLOCK_STORE
11+
12+
#extension GL_EXT_control_flow_attributes : require
13+
14+
#include "conv2d_common.glslh"
15+
#include "conv2d_int8_activation_block.glslh"
16+
17+
ivec4 load_packed_int8_output_block(
18+
const Conv2dBlockIndex block_idx,
19+
const Conv2dBlockExtents block_extents) {
20+
#ifdef INPUT_BUFFER
21+
const int buffer_idx = block_idx.data.y * block_extents.data_xz +
22+
block_idx.data.x * block_extents.data.z + block_idx.data.z;
23+
return t_packed_int8_output[buffer_idx];
24+
#else
25+
return texelFetch(t_packed_int8_output, block_idx.data, 0);
26+
#endif
27+
}
28+
29+
#endif // CONV2D_INT8_OUTPUT_BLOCK_STORE

backends/vulkan/runtime/graph/ops/glsl/linear_common.glslh

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,6 @@
1616

1717
#include "common.glslh"
1818

19-
int sign_extend_8bit(const int val) {
20-
if ((val & 0x80) != 0) {
21-
return val | (~0xFF);
22-
}
23-
return val;
24-
}
25-
26-
int extract_8bit_from_packed_int_le(const int packed, const int i) {
27-
// account for little endian
28-
int byte = sign_extend_8bit(packed >> (8 * i) & 0xFF);
29-
return byte;
30-
}
31-
3219
// Extract a 4-bit value from a packed int (little endian)
3320
// It is assumed that the 4-bit value is in the range [0, 15]
3421
int extract_4bit_from_packed_int_le(const int packed, const int col) {

0 commit comments

Comments
 (0)