From 8392ed57f87911bf94c3a15265cc386d7e9f3aba Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Tue, 15 Jul 2025 08:34:42 -0700 Subject: [PATCH 1/2] [ET-VK] Adding extract_scalar_or function to extract scalar value or return a default if value at index is none. Pull Request resolved: https://github.com/pytorch/executorch/pull/12357 This diff adds a new function `extract_scalar_or` to the `ComputeGraph` class, which extracts a scalar value from a `ValueRef` index. If the value at the index is `None`, it returns a default value. ghstack-source-id: 296319453 @exported-using-ghexport Differential Revision: [D78094858](https://our.internmc.facebook.com/intern/diff/D78094858/) --- backends/vulkan/runtime/graph/ComputeGraph.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 78135a434e5..5c19a6003e8 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -424,6 +424,12 @@ class ComputeGraph final { // Scalar Value Extraction // + bool is_scalar_or_none(const ValueRef idx) const { + const Value& value = values_.at(idx); + return value.isInt() || value.isDouble() || value.isBool() || + value.isNone(); + } + template T extract_scalar(const ValueRef idx) { Value& value = values_.at(idx); @@ -439,6 +445,15 @@ class ComputeGraph final { VK_THROW("Cannot extract scalar from Value with type ", value.type()); } + template + T extract_scalar_or(const ValueRef idx, const T default_value) { + Value& value = values_.at(idx); + if (value.isNone()) { + return default_value; + } + return extract_scalar(idx); + } + template std::optional extract_optional_scalar(const ValueRef idx) { if (val_is_none(idx)) { From 2d8f1f6558765d2cc37c65c06639fec7057f2a86 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Tue, 15 Jul 2025 08:34:43 -0700 Subject: [PATCH 2/2] [ET-VK] Adding push constant and ubo verison of select and slice ops to improve memory and performance. Pull Request resolved: https://github.com/pytorch/executorch/pull/12358 Adding push constant and ubo verison of select and slice ops to improve memory and performance. * Updated `transfer_buffer.yaml` and `transfer_texture.yaml` to include `UBO_PARAMS` parameter and generate variants for `select` and `slice` ops with UBO parameters. * Updated `transfer.glsl` to generate ubo and push constant versions of `select` and `slice` ops with UBO parameters. ghstack-source-id: 296319454 Differential Revision: [D78095262](https://our.internmc.facebook.com/intern/diff/D78095262/) --- .../graph/ops/glsl/transfer_buffer.glsl | 19 ++++-- .../graph/ops/glsl/transfer_buffer.yaml | 7 +++ .../graph/ops/glsl/transfer_texture.glsl | 19 ++++-- .../graph/ops/glsl/transfer_texture.yaml | 7 +++ .../runtime/graph/ops/impl/Transfer.cpp | 63 ++++++++++++------- 5 files changed, 84 insertions(+), 31 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl index 7e95b52d8f4..7605c59c72f 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl @@ -9,6 +9,7 @@ #version 450 core #define PRECISION ${PRECISION} +#define UBO_PARAMS ${UBO_PARAMS} #define VEC4_T ${texel_type(DTYPE)} #define T ${buffer_scalar_type(DTYPE)} @@ -22,12 +23,13 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")} ${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")} -$if OP_NAME == "slice": - ${layout_declare_ubo(B, "int", "start")} - ${layout_declare_ubo(B, "int", "step")} +$if UBO_PARAMS: + $if OP_NAME == "slice": + ${layout_declare_ubo(B, "int", "start")} + ${layout_declare_ubo(B, "int", "step")} -$if OP_NAME == "select": - ${layout_declare_ubo(B, "int", "index")} + $if OP_NAME == "select": + ${layout_declare_ubo(B, "int", "index")} layout(push_constant) uniform restrict Block { ivec4 in_sizes; @@ -35,6 +37,13 @@ layout(push_constant) uniform restrict Block { ivec4 in_strides; int out_numel; int selected_dim; + $if not UBO_PARAMS: + $if OP_NAME == "slice": + int start; + int step; + + $if OP_NAME == "select": + int index; }; ${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml index bdde613c8ce..47440cd0a13 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml @@ -2,6 +2,7 @@ transfer_buffer: parameter_names_with_default_values: DTYPE: float OP_NAME: select + UBO_PARAMS: False generate_variant_forall: DTYPE: - VALUE: half @@ -11,3 +12,9 @@ transfer_buffer: OP_NAME: select - NAME: slice_buffer OP_NAME: slice + - NAME: select_ubo_buffer + OP_NAME: select + UBO_PARAMS: True + - NAME: slice_ubo_buffer + OP_NAME: slice + UBO_PARAMS: True diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl index d3e25436c04..0f34713cb43 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl @@ -9,6 +9,7 @@ #version 450 core #define PRECISION ${PRECISION} +#define UBO_PARAMS ${UBO_PARAMS} #define VEC4_T ${texel_type(DTYPE)} #define T ${buffer_scalar_type(DTYPE)} @@ -23,17 +24,25 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, "texture3d")} ${layout_declare_tensor(B, "r", "t_in", DTYPE, "texture3d")} -$if OP_NAME == "slice": - ${layout_declare_ubo(B, "int", "start")} - ${layout_declare_ubo(B, "int", "step")} +$if UBO_PARAMS: + $if OP_NAME == "slice": + ${layout_declare_ubo(B, "int", "start")} + ${layout_declare_ubo(B, "int", "step")} -$if OP_NAME == "select": - ${layout_declare_ubo(B, "int", "index")} + $if OP_NAME == "select": + ${layout_declare_ubo(B, "int", "index")} layout(push_constant) uniform restrict Block { ivec4 out_sizes; ivec4 in_sizes; int selected_dim; + $if not UBO_PARAMS: + $if OP_NAME == "slice": + int start; + int step; + + $if OP_NAME == "select": + int index; }; ${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml index f877ee036e4..7484697f097 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml @@ -2,6 +2,7 @@ transfer_texture: parameter_names_with_default_values: DTYPE: float OP_NAME: select + UBO_PARAMS: False generate_variant_forall: DTYPE: - VALUE: half @@ -11,3 +12,9 @@ transfer_texture: OP_NAME: select - NAME: slice_texture3d OP_NAME: slice + - NAME: select_ubo_texture3d + OP_NAME: select + UBO_PARAMS: True + - NAME: slice_ubo_texture3d + OP_NAME: slice + UBO_PARAMS: True diff --git a/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp b/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp index 7b5fad57483..60127ecf9bd 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp @@ -40,34 +40,52 @@ void add_transfer_copy_node( int64_t dim_whcn = nchw_dim_to_whcn_dim(dim, ndim); + struct TransferParams { + int32_t dim; + int32_t index_or_start_ref; + int32_t step_ref; + } transfer_params{static_cast(dim_whcn), 0, 0}; + + const bool param_is_scalar = graph.is_scalar_or_none(index_or_start_ref) && + (transfer_type == TransferType::SELECT || + graph.is_scalar_or_none(step_ref)); + vkapi::ParamsBindList param_buffers; - if (transfer_type == TransferType::SELECT) { - param_buffers = { - graph.get_or_create_int_param_buffer(index_or_start_ref, 0)}; - } else { // TransferType::SLICE - param_buffers = { - graph.get_or_create_int_param_buffer(index_or_start_ref, 0), - graph.get_or_create_int_param_buffer(step_ref, 1)}; + if (!param_is_scalar) { + if (transfer_type == TransferType::SELECT) { + param_buffers = { + graph.get_or_create_int_param_buffer(index_or_start_ref, 0)}; + } else { // TransferType::SLICE + param_buffers = { + graph.get_or_create_int_param_buffer(index_or_start_ref, 0), + graph.get_or_create_int_param_buffer(step_ref, 1)}; + } + } else { + transfer_params.index_or_start_ref = + graph.extract_scalar_or(index_or_start_ref, 0); + if (transfer_type != TransferType::SELECT) { + transfer_params.step_ref = graph.extract_scalar_or(step_ref, 1); + } } - const struct TransferParams { - const int32_t dim; - } transfer_params{static_cast(dim_whcn)}; - std::vector push_constants; + push_constants.reserve(graph.is_buffer_storage(out) ? 5 : 3); if (graph.is_buffer_storage(out)) { - push_constants = { - graph.sizes_pc_of(in), - graph.strides_pc_of(out), - graph.strides_pc_of(in), - graph.numel_pc_of(out), - PushConstantDataInfo(&transfer_params, sizeof(transfer_params))}; + push_constants.emplace_back(graph.sizes_pc_of(in)); + push_constants.emplace_back(graph.strides_pc_of(out)); + push_constants.emplace_back(graph.strides_pc_of(in)); + push_constants.emplace_back(graph.numel_pc_of(out)); } else { - push_constants = { - graph.sizes_pc_of(out), - graph.sizes_pc_of(in), - PushConstantDataInfo(&transfer_params, sizeof(transfer_params))}; + push_constants.emplace_back(graph.sizes_pc_of(out)); + push_constants.emplace_back(graph.sizes_pc_of(in)); + } + + if (param_is_scalar) { + push_constants.emplace_back(&transfer_params, sizeof(transfer_params)); + } else { + push_constants.emplace_back( + &transfer_params.dim, sizeof(transfer_params.dim)); } vkapi::SpecVarList spec_vars = { @@ -82,6 +100,9 @@ void add_transfer_copy_node( } else { // TransferType::SLICE kernel_name = "slice"; } + if (!param_is_scalar) { + kernel_name += "_ubo"; + } add_storage_type_suffix(kernel_name, graph.storage_type_of(out)); add_dtype_suffix(kernel_name, graph.dtype_of(out));