Skip to content

Commit 87cc2f9

Browse files
authored
[ET-VK] New Implementation of `permute' operator
Differential Revision: D76483755 Pull Request resolved: #11825
1 parent 0fb4cc1 commit 87cc2f9

File tree

8 files changed

+253
-124
lines changed

8 files changed

+253
-124
lines changed

backends/vulkan/runtime/graph/ops/glsl/permute.glsl

Lines changed: 0 additions & 89 deletions
This file was deleted.
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
11+
#define PRECISION ${PRECISION}
12+
13+
#define VEC4_T ${texel_type(DTYPE)}
14+
#define T ${buffer_scalar_type(DTYPE)}
15+
16+
${define_active_storage_type("buffer")}
17+
${define_required_extensions(DTYPE)}
18+
19+
layout(std430) buffer;
20+
21+
#include "indexing_utils.h"
22+
23+
${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")}
24+
${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")}
25+
26+
${layout_declare_ubo(B, "ivec4", "in_sizes")}
27+
${layout_declare_ubo(B, "ivec4", "out_strides")}
28+
${layout_declare_ubo(B, "int", "out_numel")}
29+
30+
layout(push_constant) uniform restrict Block {
31+
ivec4 in_strides;
32+
ivec4 permute_dims; // Permutation mapping: permute_dims[i] = j means output dim i comes from input dim j
33+
};
34+
35+
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
36+
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
37+
38+
const lowp ivec4 out_dim_order = unhash_dim_order(out_layout);
39+
40+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
41+
42+
// Convert output tensor index to input tensor index based on permutation
43+
ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) {
44+
ivec4 in_tidx;
45+
46+
// Apply the permutation mapping: in_tidx[permute_dims[i]] = out_tidx[i]
47+
in_tidx[permute_dims.x] = out_tidx.x;
48+
in_tidx[permute_dims.y] = out_tidx.y;
49+
in_tidx[permute_dims.z] = out_tidx.z;
50+
in_tidx[permute_dims.w] = out_tidx.w;
51+
52+
return in_tidx;
53+
}
54+
55+
void main() {
56+
const int out_bufi = ivec3(gl_GlobalInvocationID).x;
57+
if (out_bufi >= out_numel) {
58+
return;
59+
}
60+
61+
// Convert buffer index to tensor index for output
62+
const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order);
63+
64+
// Convert output tensor index to input tensor index using permutation
65+
const ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
66+
67+
// Convert input tensor index back to buffer index
68+
const int in_bufi = tidx_to_bufi(in_tidx, in_strides);
69+
70+
// Copy data from input to output
71+
t_out[out_bufi] = t_in[in_bufi];
72+
}
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
permute:
1+
permute_buffer:
22
parameter_names_with_default_values:
33
DTYPE: float
4-
NDIM: 3
5-
STORAGE: texture3d
64
generate_variant_forall:
75
DTYPE:
86
- VALUE: half
97
- VALUE: float
108
- VALUE: int32
119
shader_variants:
12-
- NAME: permute
10+
- NAME: permute_buffer
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
11+
#define PRECISION ${PRECISION}
12+
13+
#define VEC4_T ${texel_type(DTYPE)}
14+
#define T ${buffer_scalar_type(DTYPE)}
15+
16+
${define_active_storage_type("texture3d")}
17+
${define_required_extensions(DTYPE)}
18+
19+
layout(std430) buffer;
20+
21+
#include "indexing_utils.h"
22+
23+
${layout_declare_tensor(B, "w", "t_out", DTYPE, "texture3d")}
24+
${layout_declare_tensor(B, "r", "t_in", DTYPE, "texture3d")}
25+
26+
layout(push_constant) uniform restrict Block {
27+
ivec4 out_sizes;
28+
ivec4 in_sizes;
29+
ivec4 permute_dims; // Permutation mapping: permute_dims[i] = j means output dim i comes from input dim j
30+
};
31+
32+
${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")}
33+
const lowp ivec4 out_axis_map = unhash_axis_map(out_layout);
34+
const lowp int out_packed_dim = unhash_packed_dim(out_layout);
35+
36+
${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")}
37+
const lowp ivec4 in_axis_map = unhash_axis_map(in_layout);
38+
const lowp int in_packed_dim = unhash_packed_dim(in_layout);
39+
40+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
41+
42+
// Convert output tensor index to input tensor index based on permutation
43+
ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) {
44+
ivec4 in_tidx;
45+
46+
// Apply the permutation mapping: in_tidx[permute_dims[i]] = out_tidx[i]
47+
in_tidx[permute_dims.x] = out_tidx.x;
48+
in_tidx[permute_dims.y] = out_tidx.y;
49+
in_tidx[permute_dims.z] = out_tidx.z;
50+
in_tidx[permute_dims.w] = out_tidx.w;
51+
52+
return in_tidx;
53+
}
54+
55+
// Check if we can use the fast path where texels from the input tensor can be
56+
// copied directly into the output tensor. This occurs when the packed dimension
57+
// is preserved in the permutation, i.e. reading a texel from the output tensor
58+
// produces 4 texels along the same dimension as reading a texel from the input
59+
// tensor.
60+
bool can_use_fast_path() {
61+
// Fast path is possible when the packed dimension is preserved in the permutation
62+
// This means permute_dims[out_packed_dim] == in_packed_dim
63+
return permute_dims[out_packed_dim] == in_packed_dim;
64+
}
65+
66+
void main() {
67+
const ivec3 lpos = ivec3(gl_GlobalInvocationID);
68+
ivec4 out_tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, out_packed_dim);
69+
70+
if (any(greaterThanEqual(out_tidx, out_sizes))) {
71+
return;
72+
}
73+
74+
if (can_use_fast_path()) {
75+
// Fast path: packed dimension is preserved, so we can copy texels directly
76+
ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
77+
ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim);
78+
VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos));
79+
80+
write_texel_lpos(t_out, lpos, in_texel, out_axis_map);
81+
}
82+
else {
83+
// Slow path: packed dimension is not preserved, so each element of the
84+
// output texel may be "sourced" from a different texel in the input tensor.
85+
// Therefore each output texel element is processed individually.
86+
VEC4_T out_texel = VEC4_T(0);
87+
88+
for (int texel_i = 0; texel_i < 4; ++texel_i) {
89+
ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx);
90+
ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim);
91+
int element_idx = in_tidx[in_packed_dim] % 4;
92+
93+
VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos));
94+
T selected_value = T(in_texel[element_idx]);
95+
96+
out_texel[texel_i] = selected_value;
97+
98+
out_tidx[out_packed_dim]++;
99+
}
100+
101+
write_texel_lpos(t_out, lpos, out_texel, out_axis_map);
102+
}
103+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
permute_texture:
2+
parameter_names_with_default_values:
3+
DTYPE: float
4+
generate_variant_forall:
5+
DTYPE:
6+
- VALUE: half
7+
- VALUE: float
8+
- VALUE: int32
9+
shader_variants:
10+
- NAME: permute_texture3d

0 commit comments

Comments
 (0)