Skip to content

Commit 69bdf4c

Browse files
committed
Update base for Update on "[ET-VK] Add support for binary symint ops"
## Changes * Add an implementation for binary operators which add symbolic integers. ## Motivation Support executing llama models with dynamic shapes. This operator shows up when exporting with dynamic shapes. Differential Revision: [D75238029](https://our.internmc.facebook.com/intern/diff/D75238029/) [ghstack-poisoned]
1 parent 801ea2f commit 69bdf4c

21 files changed

+128
-118
lines changed

.ci/scripts/test_model.sh

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,24 @@ prepare_artifacts_upload() {
4949
}
5050

5151
build_cmake_executor_runner() {
52+
local backend_string_select="${1:-}"
5253
echo "Building executor_runner"
5354
rm -rf ${CMAKE_OUTPUT_DIR}
54-
cmake -DCMAKE_BUILD_TYPE=Debug \
55-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
56-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
57-
-B${CMAKE_OUTPUT_DIR} .
58-
59-
cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug
55+
mkdir ${CMAKE_OUTPUT_DIR}
56+
if [[ "$backend_string_select" == "XNNPACK" ]]; then
57+
echo "Backend $backend_string_select selected"
58+
(cd ${CMAKE_OUTPUT_DIR} \
59+
&& cmake -DCMAKE_BUILD_TYPE=Release \
60+
-DEXECUTORCH_BUILD_XNNPACK=ON \
61+
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
62+
cmake --build ${CMAKE_OUTPUT_DIR} -j4
63+
else
64+
cmake -DCMAKE_BUILD_TYPE=Debug \
65+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
66+
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
67+
-B${CMAKE_OUTPUT_DIR} .
68+
cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug
69+
fi
6070
}
6171

6272
run_portable_executor_runner() {
@@ -111,19 +121,6 @@ test_model() {
111121
run_portable_executor_runner
112122
}
113123

114-
build_cmake_xnn_executor_runner() {
115-
echo "Building xnn_executor_runner"
116-
117-
(rm -rf ${CMAKE_OUTPUT_DIR} \
118-
&& mkdir ${CMAKE_OUTPUT_DIR} \
119-
&& cd ${CMAKE_OUTPUT_DIR} \
120-
&& retry cmake -DCMAKE_BUILD_TYPE=Release \
121-
-DEXECUTORCH_BUILD_XNNPACK=ON \
122-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
123-
124-
cmake --build ${CMAKE_OUTPUT_DIR} -j4
125-
}
126-
127124
test_model_with_xnnpack() {
128125
WITH_QUANTIZATION=$1
129126
WITH_DELEGATION=$2
@@ -148,12 +145,11 @@ test_model_with_xnnpack() {
148145

149146
# Run test model
150147
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
148+
# TODO eventually buck should also use consolidated executor runners
151149
buck2 run //examples/xnnpack:xnn_executor_runner -- --model_path "${OUTPUT_MODEL_PATH}"
152150
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
153-
if [[ ! -f ${CMAKE_OUTPUT_DIR}/backends/xnnpack/xnn_executor_runner ]]; then
154-
build_cmake_xnn_executor_runner
155-
fi
156-
./${CMAKE_OUTPUT_DIR}/backends/xnnpack/xnn_executor_runner --model_path "${OUTPUT_MODEL_PATH}"
151+
build_cmake_executor_runner "XNNPACK"
152+
./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "${OUTPUT_MODEL_PATH}"
157153
else
158154
echo "Invalid build tool ${BUILD_TOOL}. Only buck2 and cmake are supported atm"
159155
exit 1

backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,13 @@ layout(std430) buffer;
2222

2323
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
2424
${layout_declare_buffer(B, "r", "nchw_in", "int")}
25-
${layout_declare_ubo(B, "ivec4", "sizes")}
25+
26+
$if USE_PUSH_CONST:
27+
layout(push_constant) uniform restrict Block {
28+
ivec4 sizes;
29+
};
30+
$else:
31+
${layout_declare_ubo(B, "ivec4", "sizes")}
2632

2733
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
2834

backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ nchw_to_bitw8_image_nobitw8buffer:
88
parameter_names_with_default_values:
99
STORAGE: texture3d
1010
DTYPE: int8
11+
USE_PUSH_CONST: True
1112
generate_variant_forall:
1213
STORAGE:
1314
- VALUE: texture2d
@@ -17,3 +18,5 @@ nchw_to_bitw8_image_nobitw8buffer:
1718
- VALUE: uint8
1819
shader_variants:
1920
- NAME: nchw_to_bitw8_image_nobitw8buffer
21+
- NAME: nchw_to_bitw8_image_nobitw8buffer_no_pc
22+
USE_PUSH_CONST: False

backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,17 @@ layout(std430) buffer;
1212

1313
${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
1414
${layout_declare_tensor(1, "r", "nchw_in", DTYPE, STORAGE)}
15-
${layout_declare_ubo(2, "ivec4", "out_sizes")}
16-
${layout_declare_ubo(3, "ivec4", "out_strides")}
17-
${layout_declare_ubo(4, "int", "numel")}
15+
16+
$if USE_PUSH_CONST:
17+
layout(push_constant) uniform restrict Block {
18+
ivec4 out_sizes;
19+
ivec4 out_strides;
20+
int numel;
21+
};
22+
$else:
23+
${layout_declare_ubo(2, "ivec4", "out_sizes")}
24+
${layout_declare_ubo(3, "ivec4", "out_strides")}
25+
${layout_declare_ubo(4, "int", "numel")}
1826

1927
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
2028

backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ nchw_to_buffer:
88
parameter_names_with_default_values:
99
DTYPE: float
1010
STORAGE: buffer
11+
USE_PUSH_CONST: True
1112
generate_variant_forall:
1213
DTYPE:
1314
- VALUE: half
@@ -17,3 +18,5 @@ nchw_to_buffer:
1718
- VALUE: uint8
1819
shader_variants:
1920
- NAME: nchw_to_buffer
21+
- NAME: nchw_to_buffer_no_pc
22+
USE_PUSH_CONST: False

backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,17 @@ layout(std430) buffer;
2121

2222
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
2323
${layout_declare_buffer(B, "r", "buf_in", DTYPE)}
24-
${layout_declare_ubo(B, "ivec4", "sizes")}
25-
$if not FROM_STAGING:
26-
${layout_declare_ubo(B, "ivec4", "buf_strides")}
24+
25+
$if USE_PUSH_CONST:
26+
layout(push_constant) uniform restrict Block {
27+
ivec4 sizes;
28+
$if not FROM_STAGING:
29+
ivec4 buf_strides;
30+
};
31+
$else:
32+
${layout_declare_ubo(B, "ivec4", "sizes")}
33+
$if not FROM_STAGING:
34+
${layout_declare_ubo(B, "ivec4", "buf_strides")}
2735

2836
#include "indexing_utils.h"
2937

backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ nchw_to_image:
99
STORAGE: texture3d
1010
DTYPE: float
1111
FROM_STAGING: True
12+
USE_PUSH_CONST: True
1213
generate_variant_forall:
1314
DTYPE:
1415
- VALUE: half
@@ -22,3 +23,11 @@ nchw_to_image:
2223
STORAGE: texture2d
2324
- NAME: clone_buffer_to_image
2425
FROM_STAGING: False
26+
- NAME: nchw_to_image_no_pc_texture3d
27+
USE_PUSH_CONST: False
28+
- NAME: nchw_to_image_no_pc_texture2d
29+
STORAGE: texture2d
30+
USE_PUSH_CONST: False
31+
- NAME: clone_buffer_to_image_no_pc
32+
FROM_STAGING: False
33+
USE_PUSH_CONST: False

backends/vulkan/runtime/graph/ops/impl/Clone.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,9 @@ void add_buffer_to_image_node(
105105
// Input and Outputs
106106
{{image, vkapi::kWrite}, {buffer, vkapi::kRead}},
107107
// Parameter Buffers
108-
{graph.sizes_ubo(image), graph.strides_ubo(buffer)},
109-
// Push Constants
110108
{},
109+
// Push Constants
110+
{graph.sizes_pc_of(image), graph.strides_pc_of(buffer)},
111111
// Specialization Constants
112112
{graph.hashed_layout_of(image)},
113113
// Resize Args

backends/vulkan/runtime/graph/ops/impl/Convolution.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,10 @@ ValueRef prepack_biases(
106106
graph.create_local_wg_size(v),
107107
vref,
108108
v,
109-
{t->sizes_ubo()},
109+
{},
110110
// Specialization constants
111-
{t->hashed_layout()}));
111+
{t->hashed_layout()},
112+
{graph.sizes_pc_of(v)}));
112113

113114
return v;
114115
}

backends/vulkan/runtime/graph/ops/impl/Staging.cpp

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,14 @@ void add_staging_to_tensor_node(
2828
vkapi::ShaderInfo shader = get_nchw_to_tensor_shader(
2929
*graph.get_tensor(out_tensor), graph.int8_buffers_enabled());
3030

31-
vkapi::ParamsBindList ubos;
31+
std::vector<PushConstantDataInfo> pcs;
3232
if (graph.is_buffer_storage(out_tensor)) {
33-
ubos.append(
34-
{graph.sizes_ubo(out_tensor),
35-
graph.strides_ubo(out_tensor),
36-
graph.numel_ubo(out_tensor)});
33+
pcs = {
34+
graph.sizes_pc_of(out_tensor),
35+
graph.strides_pc_of(out_tensor),
36+
graph.numel_pc_of(out_tensor)};
3737
} else {
38-
ubos.append({graph.sizes_ubo(out_tensor)});
38+
pcs = {graph.sizes_pc_of(out_tensor)};
3939
}
4040

4141
graph.execute_nodes().emplace_back(new DispatchNode(
@@ -46,9 +46,9 @@ void add_staging_to_tensor_node(
4646
// Input and Outputs
4747
{{out_tensor, vkapi::kWrite}, {in_staging, vkapi::kRead}},
4848
// Parameter Buffers
49-
ubos,
50-
// Push Constants
5149
{},
50+
// Push Constants
51+
pcs,
5252
// Specialization Constants
5353
{graph.hashed_layout_of(out_tensor)},
5454
// Resize Args
@@ -127,14 +127,14 @@ void add_prepack_standard_node(
127127
vkapi::ShaderInfo shader = get_nchw_to_tensor_shader(
128128
*graph.get_tensor(tensor), graph.int8_buffers_enabled());
129129

130-
vkapi::ParamsBindList ubos;
130+
std::vector<PushConstantDataInfo> pcs;
131131
if (graph.is_buffer_storage(tensor)) {
132-
ubos.append(
133-
{graph.sizes_ubo(tensor),
134-
graph.strides_ubo(tensor),
135-
graph.numel_ubo(tensor)});
132+
pcs = {
133+
graph.sizes_pc_of(tensor),
134+
graph.strides_pc_of(tensor),
135+
graph.numel_pc_of(tensor)};
136136
} else {
137-
ubos.append({graph.sizes_ubo(tensor)});
137+
pcs = {graph.sizes_pc_of(tensor)};
138138
}
139139

140140
int transpose_hw_spec = transpose_hw ? 1 : 0;
@@ -148,9 +148,10 @@ void add_prepack_standard_node(
148148
tensor_data,
149149
tensor,
150150
// Parameter Buffers
151-
ubos,
151+
{},
152152
// Specialization Constants
153-
{graph.hashed_layout_of(tensor), transpose_hw_spec}));
153+
{graph.hashed_layout_of(tensor), transpose_hw_spec},
154+
pcs));
154155
}
155156

156157
ValueRef prepack_standard(

0 commit comments

Comments
 (0)