Skip to content

Commit 053193f

Browse files
SS-JIAssjia
andauthored
[ET-VK][ez] Migrate slice/select shaders to use BufferMetadata/TextureMetadata (pytorch#15796)
Stack from [ghstack](https://github.com/ezyang/ghstack) (oldest at bottom): * pytorch#15829 * __->__ pytorch#15796 * pytorch#15795 * pytorch#15794 * pytorch#15793 Title says it all! Motivation: code simplification and allows these ops to handle high dim tensors. Differential Revision: [D86910641](https://our.internmc.facebook.com/intern/diff/D86910641/) --------- Co-authored-by: ssjia <[email protected]>
1 parent 89e2c5d commit 053193f

File tree

5 files changed

+129
-131
lines changed

5 files changed

+129
-131
lines changed

backends/vulkan/runtime/graph/ops/glsl/select.glslh

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,70 +9,87 @@
99
#ifndef SELECT_GLSLH
1010
#define SELECT_GLSLH
1111

12-
#ifndef USING_BUFFER
12+
#ifdef USING_BUFFER
1313

1414
/*
15-
* Enable the fast path if a texel loaded from the input texture can be used as
16-
* is to store to the output texture. The following conditions must be met:
15+
* Converts output tensor indices to input tensor indices for the select operation
16+
* on buffer storage.
1717
*
18-
* 1. The input and output textures have the same packed dimension.
19-
* 2. The selected_dim must not be the packed dimension of the input.
20-
* 3. The packed dimension of the input must "map" to the packed dimension of
21-
* the output. This occurs if selected_dim is greater than the packed dimension
22-
* of the input.
18+
* This is done by "inserting" the select index at the selected_dim in the input
19+
* tensor index.
20+
*
21+
* Parameters assumed to be defined:
22+
* - inp: BufferMetadata
23+
* - selected_dim
24+
* - index
2325
*/
24-
bool can_use_fast_path() {
25-
if (out_packed_dim != in_packed_dim) {
26-
return false;
26+
TensorIndex out_tidx_to_in_tidx(const TensorIndex out_tidx) {
27+
TensorIndex in_tidx;
28+
initialize(in_tidx);
29+
30+
int in_size = int(size_at(inp, selected_dim));
31+
int adjusted_index = index;
32+
if (index < 0) {
33+
adjusted_index = index + in_size;
2734
}
28-
if (selected_dim <= in_packed_dim) {
29-
return false;
35+
36+
// Copy indices before selected_dim
37+
for (int d = 0; d < selected_dim; d++) {
38+
in_tidx.data[div_4(d)][mod_4(d)] = idx_at(out_tidx, d);
3039
}
31-
return true;
40+
41+
// Insert the selected index
42+
in_tidx.data[div_4(selected_dim)][mod_4(selected_dim)] = adjusted_index;
43+
44+
// Copy indices after selected_dim (shifted by 1)
45+
for (int d = selected_dim; d < int_ndim(inp) - 1; d++) {
46+
in_tidx.data[div_4(d + 1)][mod_4(d + 1)] = idx_at(out_tidx, d);
47+
}
48+
49+
return in_tidx;
3250
}
3351

34-
#endif // USING_BUFFER
52+
#else // texture storage
3553

3654
/*
37-
* Given an output tensor index, return the corresponding input tensor index for
38-
* the select operator. This is done by "inserting" the select index at the
39-
* selected_dim in the input tensor index.
55+
* Converts output tensor indices to input tensor indices for the select operation
56+
* on texture storage.
4057
*
41-
* A simple example is (note all tensor index are in WHCN order):
42-
* out_tidx = [7, 5, 9]
43-
* selected_dim = 2
44-
* index = 3
45-
* in_tidx = [7, 3, 5, 9]
58+
* This is done by "inserting" the select index at the selected_dim in the input
59+
* tensor index.
4660
*
47-
* This function assumes that the following variables are defined in the layout:
48-
* - in_sizes
61+
* Parameters assumed to be defined:
62+
* - inp: TextureMetadata
4963
* - selected_dim
5064
* - index
5165
*/
52-
ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) {
53-
ivec4 in_tidx = ivec4(0);
66+
TensorIndex4D out_tidx_to_in_tidx(const TensorIndex4D out_tidx) {
67+
TensorIndex4D in_tidx;
68+
in_tidx.data = ivec4(0);
5469

5570
int adjusted_index = index;
5671
if (index < 0) {
57-
adjusted_index = index + in_sizes[selected_dim];
72+
adjusted_index = index + inp.sizes[selected_dim];
5873
}
5974

6075
// Handle different dimensions for selection
6176
if (selected_dim == 0) {
6277
// Select from width dimension
63-
in_tidx = ivec4(adjusted_index, out_tidx.x, out_tidx.y, out_tidx.z);
78+
in_tidx.data = ivec4(adjusted_index, out_tidx.data.x, out_tidx.data.y, out_tidx.data.z);
6479
} else if (selected_dim == 1) {
6580
// Select from height dimension
66-
in_tidx = ivec4(out_tidx.x, adjusted_index, out_tidx.y, out_tidx.z);
81+
in_tidx.data = ivec4(out_tidx.data.x, adjusted_index, out_tidx.data.y, out_tidx.data.z);
6782
} else if (selected_dim == 2) {
6883
// Select from channel dimension
69-
in_tidx = ivec4(out_tidx.x, out_tidx.y, adjusted_index, out_tidx.z);
84+
in_tidx.data = ivec4(out_tidx.data.x, out_tidx.data.y, adjusted_index, out_tidx.data.z);
7085
} else if (selected_dim == 3) {
7186
// Select from batch dimension
72-
in_tidx = ivec4(out_tidx.x, out_tidx.y, out_tidx.z, adjusted_index);
87+
in_tidx.data = ivec4(out_tidx.data.x, out_tidx.data.y, out_tidx.data.z, adjusted_index);
7388
}
7489

7590
return in_tidx;
7691
}
7792

93+
#endif // USING_BUFFER
94+
7895
#endif // SELECT_GLSLH

backends/vulkan/runtime/graph/ops/glsl/slice.glslh

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,49 +9,61 @@
99
#ifndef SLICE_GLSLH
1010
#define SLICE_GLSLH
1111

12-
#ifndef USING_BUFFER
12+
#include "indexing.glslh"
1313

14-
/**
15-
* Enable the fast path if a texel loaded from the input texture can be used as
16-
* is to store to the output texture. The following conditions must be met:
14+
#ifdef USING_BUFFER
15+
16+
/*
17+
* Converts output tensor indices to input tensor indices for the slice operation
18+
* on buffer storage.
1719
*
18-
* 1. The input and output textures have the same packed dimension.
19-
* 2. The select_dim must not be the packed dimension of the input.
20+
* Parameters assumed to be defined:
21+
* - inp: BufferMetadata
22+
* - selected_dim
23+
* - start
24+
* - step
2025
*/
21-
bool can_use_fast_path() {
22-
if (out_packed_dim != in_packed_dim) {
23-
return false;
24-
}
25-
if (in_packed_dim == selected_dim) {
26-
return false;
26+
TensorIndex out_tidx_to_in_tidx(const TensorIndex out_tidx) {
27+
TensorIndex in_tidx = out_tidx;
28+
29+
int in_size = int(size_at(inp, selected_dim));
30+
int adjusted_start = start;
31+
if (start < 0) {
32+
adjusted_start = start + in_size;
2733
}
28-
return true;
34+
35+
uint out_idx = idx_at(out_tidx, selected_dim);
36+
in_tidx.data[div_4(selected_dim)][mod_4(selected_dim)] =
37+
adjusted_start + int(out_idx) * step;
38+
39+
return in_tidx;
2940
}
3041

31-
#endif // USING_BUFFER
42+
#else // texture storage
3243

3344
/*
34-
* Converts output tensor indices to input tensor indices for the slice operation.
35-
* This function maps the output indices to the corresponding input indices based on
36-
* the slice parameters (start, step, selected_dim).
45+
* Converts output tensor indices to input tensor indices for the slice operation
46+
* on texture storage.
3747
*
38-
* Parameters assumed to be defined in the layout specifier:
39-
* - in_sizes
48+
* Parameters assumed to be defined:
49+
* - inp: TextureMetadata
4050
* - selected_dim
4151
* - start
4252
* - step
4353
*/
44-
ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) {
45-
ivec4 in_tidx = out_tidx;
54+
TensorIndex4D out_tidx_to_in_tidx(const TensorIndex4D out_tidx) {
55+
TensorIndex4D in_tidx = out_tidx;
4656

4757
int adjusted_start = start;
4858
if (start < 0) {
49-
adjusted_start = start + in_sizes[selected_dim];
59+
adjusted_start = start + inp.sizes[selected_dim];
5060
}
5161

52-
in_tidx[selected_dim] = adjusted_start + out_tidx[selected_dim] * step;
62+
in_tidx.data[selected_dim] = adjusted_start + out_tidx.data[selected_dim] * step;
5363

5464
return in_tidx;
5565
}
5666

67+
#endif // USING_BUFFER
68+
5769
#endif // SLICE_GLSLH

backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,23 @@
1111
#define PRECISION ${PRECISION}
1212
#define UBO_PARAMS ${UBO_PARAMS}
1313

14-
#define VEC4_T ${texel_type(DTYPE)}
1514
#define T ${buffer_scalar_type(DTYPE)}
1615

1716
${define_active_storage_type("buffer")}
1817
${define_required_extensions(DTYPE)}
1918

19+
#extension GL_EXT_control_flow_attributes : require
20+
2021
layout(std430) buffer;
2122

22-
#include "indexing_utils.h"
23+
#include "indexing.glslh"
24+
2325
${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")}
2426
${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")}
2527

28+
${layout_declare_ubo(B, "BufferMetadata", "outp")}
29+
${layout_declare_ubo(B, "BufferMetadata", "inp")}
30+
2631
$if UBO_PARAMS:
2732
$if OP_NAME == "slice":
2833
${layout_declare_ubo(B, "int", "start")}
@@ -32,10 +37,6 @@ $if UBO_PARAMS:
3237
${layout_declare_ubo(B, "int", "index")}
3338

3439
layout(push_constant) uniform restrict Block {
35-
ivec4 in_sizes;
36-
ivec4 out_strides;
37-
ivec4 in_strides;
38-
int out_numel;
3940
int selected_dim;
4041
$if not UBO_PARAMS:
4142
$if OP_NAME == "slice":
@@ -46,24 +47,19 @@ layout(push_constant) uniform restrict Block {
4647
int index;
4748
};
4849

49-
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
50-
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
51-
52-
const lowp ivec4 out_dim_order = unhash_dim_order(out_layout);
53-
5450
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
5551

5652
#include "${OP_NAME}.glslh"
5753

5854
void main() {
59-
const int out_bufi = ivec3(gl_GlobalInvocationID).x;
60-
if (out_bufi >= out_numel) {
55+
const uint out_bufi = gl_GlobalInvocationID.x;
56+
if (out_of_bounds(out_bufi, outp)) {
6157
return;
6258
}
6359

64-
const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order);
65-
ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
60+
TensorIndex out_tidx = linear_idx_to_tensor_idx(outp, out_bufi);
61+
TensorIndex in_tidx = out_tidx_to_in_tidx(out_tidx);
6662

67-
const int in_bufi = tidx_to_bufi(in_tidx, in_strides);
63+
const uint in_bufi = tensor_idx_to_linear_idx(inp, in_tidx);
6864
t_out[out_bufi] = t_in[in_bufi];
6965
}

backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,25 @@
1111
#define PRECISION ${PRECISION}
1212
#define UBO_PARAMS ${UBO_PARAMS}
1313

14-
#define VEC4_T ${texel_type(DTYPE)}
15-
#define T ${buffer_scalar_type(DTYPE)}
14+
#define VEC4_T ${texel_load_type(DTYPE, "texture3d")}
15+
#define T ${texel_load_component_type(DTYPE, "texture3d")}
1616

1717
${define_active_storage_type("texture3d")}
1818
${define_required_extensions(DTYPE)}
1919

20+
#extension GL_EXT_control_flow_attributes : require
21+
2022
layout(std430) buffer;
2123

22-
#include "indexing_utils.h"
24+
#include "common.glslh"
25+
#include "indexing.glslh"
2326

2427
${layout_declare_tensor(B, "w", "t_out", DTYPE, "texture3d")}
2528
${layout_declare_tensor(B, "r", "t_in", DTYPE, "texture3d")}
2629

30+
${layout_declare_ubo(B, "TextureMetadata", "outp")}
31+
${layout_declare_ubo(B, "TextureMetadata", "inp")}
32+
2733
$if UBO_PARAMS:
2834
$if OP_NAME == "slice":
2935
${layout_declare_ubo(B, "int", "start")}
@@ -33,8 +39,6 @@ $if UBO_PARAMS:
3339
${layout_declare_ubo(B, "int", "index")}
3440

3541
layout(push_constant) uniform restrict Block {
36-
ivec4 out_sizes;
37-
ivec4 in_sizes;
3842
int selected_dim;
3943
$if not UBO_PARAMS:
4044
$if OP_NAME == "slice":
@@ -45,48 +49,33 @@ layout(push_constant) uniform restrict Block {
4549
int index;
4650
};
4751

48-
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
49-
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
50-
const lowp int out_packed_dim = unhash_packed_dim(out_layout);
51-
52-
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
53-
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
54-
const lowp int in_packed_dim = unhash_packed_dim(in_layout);
55-
5652
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
5753

5854
#include "${OP_NAME}.glslh"
5955

6056
void main() {
61-
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
62-
ivec4 out_tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, out_packed_dim);
57+
const ivec3 out_pos = ivec3(gl_GlobalInvocationID);
6358

64-
if (any(greaterThanEqual(out_tidx, out_sizes))) {
59+
if (out_of_bounds(out_pos, outp)) {
6560
return;
6661
}
6762

68-
if (can_use_fast_path()) {
69-
ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
70-
ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim);
71-
VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos));
63+
TensorIndex4D out_tidx = texture_pos_to_tensor4d_idx_simple(outp, out_pos);
64+
VEC4_T out_texel = VEC4_T(0);
7265

73-
write_texel_lpos(t_out, lpos, in_texel, out_axis_map);
74-
}
75-
else {
76-
VEC4_T out_texel = VEC4_T(0);
77-
for (int texel_i = 0; texel_i < 4; ++texel_i) {
78-
ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
79-
ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim);
80-
int element_idx = in_tidx[in_packed_dim] % 4;
81-
82-
VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos));
83-
T selected_value = T(in_texel[element_idx]);
66+
int limit = min(
67+
4, outp.sizes[outp.packed_dim] - out_tidx.data[outp.packed_dim]);
68+
for (int comp = 0; comp < limit; comp++) {
69+
TensorIndex4D in_tidx = out_tidx_to_in_tidx(out_tidx);
8470

85-
out_texel[texel_i] = selected_value;
71+
TextureElementIndex in_elem_pos = tensor4d_idx_to_texture_element_idx_simple(
72+
inp, in_tidx);
8673

87-
out_tidx[out_packed_dim]++;
88-
}
74+
VEC4_T in_texel = texelFetch(t_in, in_elem_pos.pos, 0);
75+
out_texel[comp] = in_texel[in_elem_pos.comp];
8976

90-
write_texel_lpos(t_out, lpos, out_texel, out_axis_map);
77+
out_tidx.data[outp.packed_dim]++;
9178
}
79+
80+
imageStore(t_out, out_pos, out_texel);
9281
}

0 commit comments

Comments
 (0)