diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp index b32f4eb4308..02df85c33e8 100644 --- a/backends/vulkan/runtime/VulkanBackend.cpp +++ b/backends/vulkan/runtime/VulkanBackend.cpp @@ -499,6 +499,8 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { compute_graph->encode_prepack(); compute_graph->prepack(); + // TODO(ssjia): remove this once we can batch compile compute pipelines + // during prepare(). compute_graph->encode_execute(); return Error::Ok; @@ -567,9 +569,14 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { } } + // propagate_resize() will re-encode the command buffer so that push + // constants are updated and DynamicDispatchNode can update the compute + // shader, global workgroup size, and local workgroup size to perform the + // model inference. if (should_propagate_resize) { compute_graph->propagate_resize(); } + compute_graph->execute(); for (size_t i = 0; i < compute_graph->outputs().size(); i++) { diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index a4a6abdd63f..1222a9fc641 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -492,7 +492,7 @@ vkapi::BufferBindInfo ComputeGraph::get_or_create_int_param_buffer( const ValueRef idx) { if (values_.at(idx).isInt()) { const int32_t val = extract_scalar(idx); - create_params_buffer(val); + return create_params_buffer(val); } else if (values_.at(idx).isSymInt()) { SymIntPtr symint = get_symint(idx); return vkapi::BufferBindInfo(symint->gpu_buffer.buffer()); @@ -500,6 +500,16 @@ vkapi::BufferBindInfo ComputeGraph::get_or_create_int_param_buffer( VK_THROW("Cannot create a int param buffer for the given value"); } +vkapi::BufferBindInfo ComputeGraph::get_or_create_int_param_buffer( + const ValueRef idx, + const int32_t default_val) { + if (values_.at(idx).isNone()) { + return create_params_buffer(default_val); + } else { + return get_or_create_int_param_buffer(idx); + } +} + void ComputeGraph::set_symint(const ValueRef idx, const int32_t val) { get_symint(idx)->set(val); } @@ -678,11 +688,12 @@ void ComputeGraph::encode_execute() { } } -void ComputeGraph::execute() const { +void ComputeGraph::execute() { vkapi::VulkanFence fence = context_->fences().get_fence(); context_->submit_cmd_to_gpu(fence.get_submit_handle()); fence.wait(); context_->fences().return_fence(fence); + execute_count_++; } void ComputeGraph::resize_input( @@ -692,10 +703,17 @@ void ComputeGraph::resize_input( get_tensor(io_val.value)->virtual_resize(new_sizes); } +void ComputeGraph::virtual_resize( + const ValueRef idx, + const std::vector& new_sizes) { + get_tensor(idx)->virtual_resize(new_sizes); +} + void ComputeGraph::propagate_resize() { for (std::unique_ptr& node : execute_nodes_) { node->trigger_resize(this); } + encode_execute(); } } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 9f4bab3ac04..fe546f26477 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -187,6 +187,7 @@ class ComputeGraph final { protected: size_t values_in_use_ = 0; + size_t execute_count_ = 0; public: // @@ -397,6 +398,19 @@ class ComputeGraph final { std::optional extract_optional_scalar(const ValueRef idx) { if (val_is_none(idx)) { return ::std::nullopt; + } else if (val_is_symint(idx)) { + return utils::safe_downcast(read_symint(idx)); + } else { + return extract_scalar(idx); + } + } + + template + T extract_optional_scalar(const ValueRef idx, const T default_val) { + if (val_is_none(idx)) { + return default_val; + } else if (val_is_symint(idx)) { + return utils::safe_downcast(read_symint(idx)); } else { return extract_scalar(idx); } @@ -608,6 +622,10 @@ class ComputeGraph final { */ vkapi::BufferBindInfo get_or_create_int_param_buffer(const ValueRef idx); + vkapi::BufferBindInfo get_or_create_int_param_buffer( + const ValueRef idx, + const int32_t default_value); + void set_symint(const ValueRef idx, const int32_t val); int32_t read_symint(const ValueRef idx); @@ -745,13 +763,16 @@ class ComputeGraph final { // void encode_execute(); - void execute() const; + void execute(); // // Dynamic Shape support // void resize_input(const int64_t idx, const std::vector& new_sizes); + void virtual_resize( + const ValueRef idx, + const std::vector& new_sizes); void propagate_resize(); // @@ -762,6 +783,10 @@ class ComputeGraph final { return context_->adapter_ptr()->supports_int16_shader_types(); } + inline size_t execute_count() const { + return execute_count_; + } + /* * Check whether the GPU supports 8 bit buffers. */ diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.cpp b/backends/vulkan/runtime/graph/ops/DispatchNode.cpp index 51ff0c122b0..a0d3a4c2e5c 100644 --- a/backends/vulkan/runtime/graph/ops/DispatchNode.cpp +++ b/backends/vulkan/runtime/graph/ops/DispatchNode.cpp @@ -46,15 +46,7 @@ void DispatchNode::encode(ComputeGraph* graph) { std::unique_lock cmd_lock = context->dispatch_lock(); - std::array push_constants_data; - uint32_t push_constants_offset = 0; - - for (const auto& push_constant : push_constants_) { - push_constants_offset += push_constant.write( - push_constants_data.data(), - push_constants_offset, - kMaxPushConstantSize); - } + write_push_constant_data(); context->report_shader_dispatch_start( shader_.kernel_name, @@ -63,7 +55,7 @@ void DispatchNode::encode(ComputeGraph* graph) { node_id_); vkapi::DescriptorSet descriptor_set = context->get_descriptor_set( - shader_, local_workgroup_size_, spec_vars_, push_constants_offset); + shader_, local_workgroup_size_, spec_vars_, push_constants_offset_); uint32_t idx = 0; idx = bind_values_to_descriptor_set( @@ -76,10 +68,20 @@ void DispatchNode::encode(ComputeGraph* graph) { pipeline_barrier, shader_, global_workgroup_size_, - push_constants_data.data(), - push_constants_offset); + push_constants_data_.data(), + push_constants_offset_); context->report_shader_dispatch_end(); } +void DispatchNode::write_push_constant_data() { + push_constants_offset_ = 0; + for (const auto& push_constant : push_constants_) { + push_constants_offset_ += push_constant.write( + push_constants_data_.data(), + push_constants_offset_, + kMaxPushConstantSize); + } +} + } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.h b/backends/vulkan/runtime/graph/ops/DispatchNode.h index c45f0a741fd..db95adfee39 100644 --- a/backends/vulkan/runtime/graph/ops/DispatchNode.h +++ b/backends/vulkan/runtime/graph/ops/DispatchNode.h @@ -50,6 +50,12 @@ class DispatchNode : public ExecuteNode { const vkapi::SpecVarList spec_vars_; const std::vector push_constants_; + // For push constants + std::array push_constants_data_{}; + uint32_t push_constants_offset_ = 0; + + void write_push_constant_data(); + public: operator bool() const { return shader_; diff --git a/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp index ac84916c6fa..a8d2fe2e99d 100644 --- a/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp +++ b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp @@ -25,9 +25,9 @@ DynamicDispatchNode::DynamicDispatchNode( const ResizeFunction& resize_fn) : DispatchNode( graph, - pick_shader_fn(&graph, args, resize_args), - pick_global_wg_fn(&graph, args, resize_args), - pick_local_wg_fn(&graph, args, resize_args), + vkapi::ShaderInfo(), + {1u, 1u, 1u}, + {1u, 1u, 1u}, args, params, push_constants, @@ -36,13 +36,57 @@ DynamicDispatchNode::DynamicDispatchNode( resize_fn), pick_shader_fn_(pick_shader_fn), pick_global_wg_fn_(pick_global_wg_fn), + pick_local_wg_fn_(pick_local_wg_fn) { + shader_ = pick_shader_fn(&graph, args, resize_args); + global_workgroup_size_ = + pick_global_wg_fn(&graph, shader_, args, resize_args); + local_workgroup_size_ = utils::WorkgroupSize(pick_local_wg_fn( + &graph, shader_, global_workgroup_size_, args, resize_args)); +} + +DynamicDispatchNode::DynamicDispatchNode( + ComputeGraph& graph, + const vkapi::ShaderInfo& shader, + const PickGlobalFn& pick_global_wg_fn, + const PickLocalFn& pick_local_wg_fn, + const std::vector& args, + const vkapi::ParamsBindList& params, + const std::vector& push_constants, + const vkapi::SpecVarList& spec_vars, + const std::vector& resize_args, + const ResizeFunction& resize_fn) + : DispatchNode( + graph, + shader, + pick_global_wg_fn(&graph, shader, args, resize_args), + pick_local_wg_fn( + &graph, + shader, + pick_global_wg_fn(&graph, shader, args, resize_args), + args, + resize_args), + args, + params, + push_constants, + spec_vars, + resize_args, + resize_fn), + pick_shader_fn_{nullptr}, + pick_global_wg_fn_(pick_global_wg_fn), pick_local_wg_fn_(pick_local_wg_fn) {} void DynamicDispatchNode::encode(ComputeGraph* graph) { - shader_ = pick_shader_fn_(graph, args_, resize_args_); - global_workgroup_size_ = pick_global_wg_fn_(graph, args_, resize_args_); - local_workgroup_size_ = - utils::WorkgroupSize(pick_local_wg_fn_(graph, args_, resize_args_)); + if (pick_shader_fn_) { + shader_ = pick_shader_fn_(graph, args_, resize_args_); + } + if (pick_global_wg_fn_) { + global_workgroup_size_ = + pick_global_wg_fn_(graph, shader_, args_, resize_args_); + } + if (pick_local_wg_fn_) { + local_workgroup_size_ = utils::WorkgroupSize(pick_local_wg_fn_( + graph, shader_, global_workgroup_size_, args_, resize_args_)); + } DispatchNode::encode(graph); } diff --git a/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h index ede50941415..005151272c3 100644 --- a/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h +++ b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h @@ -32,10 +32,13 @@ class DynamicDispatchNode final : public DispatchNode { const std::vector&)>; using PickGlobalFn = const std::function&, const std::vector&)>; using PickLocalFn = const std::function&, const std::vector&)>; @@ -51,6 +54,18 @@ class DynamicDispatchNode final : public DispatchNode { const std::vector& resize_args, const ResizeFunction& resize_fn = nullptr); + explicit DynamicDispatchNode( + ComputeGraph& graph, + const vkapi::ShaderInfo& shader, + const PickGlobalFn& pick_global_wg_fn, + const PickLocalFn& pick_local_wg_fn, + const std::vector& args, + const vkapi::ParamsBindList& params, + const std::vector& push_constants, + const vkapi::SpecVarList& spec_vars, + const std::vector& resize_args, + const ResizeFunction& resize_fn = nullptr); + ~DynamicDispatchNode() override = default; void encode(ComputeGraph* graph) override; diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h index 7563fc63c71..0731722e13a 100644 --- a/backends/vulkan/runtime/graph/ops/ExecuteNode.h +++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.h @@ -65,7 +65,7 @@ class ExecuteNode { (void)graph; } - inline void trigger_resize(ComputeGraph* graph) { + virtual inline void trigger_resize(ComputeGraph* graph) { if (resize_fn_ != nullptr) { resize_fn_(graph, args_, resize_args_); } diff --git a/backends/vulkan/runtime/graph/ops/glsl/select.glslh b/backends/vulkan/runtime/graph/ops/glsl/select.glslh new file mode 100644 index 00000000000..3bcbf04a3ba --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/glsl/select.glslh @@ -0,0 +1,74 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#ifndef SELECT_GLSLH +#define SELECT_GLSLH + +/* + * Enable the fast path if a texel loaded from the input texture can be used as + * is to store to the output texture. The following conditions must be met: + * + * 1. The input and output textures have the same packed dimension. + * 2. The selected_dim must not be the packed dimension of the input. + * 3. The packed dimension of the input must "map" to the packed dimension of + * the output. This occurs if selected_dim is greater than the packed dimension + * of the input. + */ +bool can_use_fast_path() { + if (out_packed_dim != in_packed_dim) { + return false; + } + if (selected_dim <= in_packed_dim) { + return false; + } + return true; +} + +/* + * Given an output tensor index, return the corresponding input tensor index for + * the select operator. This is done by "inserting" the select index at the + * selected_dim in the input tensor index. + * + * A simple example is (note all tensor index are in WHCN order): + * out_tidx = [7, 5, 9] + * selected_dim = 2 + * index = 3 + * in_tidx = [7, 3, 5, 9] + * + * This function assumes that the following variables are defined in the layout: + * - in_sizes + * - selected_dim + * - index + */ +ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) { + ivec4 in_tidx = ivec4(0); + + int adjusted_index = index; + if (index < 0) { + adjusted_index = index + in_sizes[selected_dim]; + } + + // Handle different dimensions for selection + if (selected_dim == 0) { + // Select from width dimension + in_tidx = ivec4(adjusted_index, out_tidx.x, out_tidx.y, out_tidx.z); + } else if (selected_dim == 1) { + // Select from height dimension + in_tidx = ivec4(out_tidx.x, adjusted_index, out_tidx.y, out_tidx.z); + } else if (selected_dim == 2) { + // Select from channel dimension + in_tidx = ivec4(out_tidx.x, out_tidx.y, adjusted_index, out_tidx.z); + } else if (selected_dim == 3) { + // Select from batch dimension + in_tidx = ivec4(out_tidx.x, out_tidx.y, out_tidx.z, adjusted_index); + } + + return in_tidx; +} + +#endif // SELECT_GLSLH diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl deleted file mode 100644 index f94e1120492..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#version 450 core - -#define PRECISION ${PRECISION} - -layout(std430) buffer; - -#include "indexing_utils.h" - -layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; -layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; - -layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { - ivec3 out_limits; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { - ivec4 sizes; -}; - -layout(set = 0, binding = 4) uniform PRECISION restrict SelectVal { - // data.x: index along batch dim to select - // data.y: number of batches - // data.z: number of texels per batch - // data.w: unused - ivec4 select_info; -}; - -layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; - -void main() { - const int num_batches = select_info.y; - const int num_texel_per_batch = select_info.z; - const int index = select_info.x; - - const ivec3 pos = ivec3(gl_GlobalInvocationID); - - if (any(greaterThanEqual(pos, out_limits))) { - return; - } - - const uint src_pos_z = (num_texel_per_batch * index) + pos.z; - imageStore( - image_out, pos, texelFetch(image_in, ivec3(pos.x, pos.y, src_pos_z), 0)); -} diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl deleted file mode 100644 index 0bbec798484..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#version 450 core - -#define PRECISION ${PRECISION} - -#define VEC4_T ${texel_type(DTYPE)} -#define T ${texel_component_type(DTYPE)} - -layout(std430) buffer; - -#include "indexing_utils.h" - -layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; -layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; - -layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { - ivec3 out_limits; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { - ivec4 sizes; -}; - -// index to select -layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal { - int index; -}; - -layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; - -void main() { - const ivec3 pos = ivec3(gl_GlobalInvocationID); - - if (any(greaterThanEqual(pos, out_limits))) { - return; - } - - const int tex = index / 4; - const int ind = index % 4; - const T v = VEC4_T(texelFetch(image_in, ivec3(pos.x, pos.y, tex), 0))[ind]; - - imageStore(image_out, ivec3(pos.x, pos.y, 0), VEC4_T(v, 0, 0, 0)); -} diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.yaml b/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.yaml deleted file mode 100644 index 1c5c4e34b06..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.yaml +++ /dev/null @@ -1,10 +0,0 @@ -select_channel_3d: - parameter_names_with_default_values: - DTYPE: float - NDIM: 3 - generate_variant_forall: - DTYPE: - - VALUE: half - - VALUE: float - shader_variants: - - NAME: select_channel_3d diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl deleted file mode 100644 index 517362f76ea..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#version 450 core - -#define PRECISION ${PRECISION} -#define VEC4_T ${texel_type(DTYPE)} - -layout(std430) buffer; - -#include "indexing_utils.h" - -layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; -layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; - -layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { - ivec3 out_limits; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { - ivec4 sizes; -}; - -layout(set = 0, binding = 4) uniform PRECISION restrict SelectVal { - // data.x: index along channel dim to select - // data.y: number of batches - // data.z: number of texels per batch - // data.w: unused - ivec4 select_info; -}; - -layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; - -void main() { - const ivec3 pos = ivec3(gl_GlobalInvocationID); - - if (any(greaterThanEqual(pos, out_limits))) { - return; - } - - const int num_batches = select_info.y; - const int num_texel_per_batch = select_info.z; - const int index = select_info.x; - - // read in the same channel from 4 separate batches - VEC4_T out_texel = VEC4_T(0, 0, 0, 0); - for (int k = 0; k < 4; k++) { - if ((k + pos.z * 4) >= - num_batches) { - break; - } - const uint src_pos_z = (4 * num_texel_per_batch * pos.z) + - (k * num_texel_per_batch) + (index / 4); - const uint src_pos_t = index % 4; - out_texel[k] = - VEC4_T(texelFetch(image_in, ivec3(pos.x, pos.y, src_pos_z), 0))[src_pos_t]; - } - - imageStore(image_out, pos, out_texel); -} diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl deleted file mode 100644 index 87409fb35fd..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#version 450 core - -#define PRECISION ${PRECISION} -#define VEC4_T ${texel_type(DTYPE)} - -layout(std430) buffer; - -#include "indexing_utils.h" - -layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; -layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; - -layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { - ivec3 out_limits; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { - ivec4 sizes; -}; - -// index to select -layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal { - int index; -}; - -layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; - -void main() { - const ivec3 pos = ivec3(gl_GlobalInvocationID); - - if (any(greaterThanEqual(pos, out_limits))) { - return; - } - - // w - const int src_x = pos.x; - // h - const int src_y = index; - // c - const int src_z = pos.y; - - const VEC4_T v = VEC4_T(texelFetch(image_in, ivec3(src_x, src_y, src_z), 0)); - - for (int i = 0; i < 4; i++) { - ivec3 new_pos = ivec3(pos.x, pos.y * 4 + i, 0); - - // When the C-channel exceeds original block size, exit early - if (new_pos.y >= sizes.y) { - return; - } - - imageStore(image_out, new_pos, VEC4_T(v[i], 0, 0, 0)); - } -} diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.yaml b/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.yaml deleted file mode 100644 index a373f1decd9..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.yaml +++ /dev/null @@ -1,10 +0,0 @@ -select_height_3d: - parameter_names_with_default_values: - DTYPE: float - NDIM: 3 - generate_variant_forall: - DTYPE: - - VALUE: half - - VALUE: float - shader_variants: - - NAME: select_height_3d diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl deleted file mode 100644 index 2e4e2afb2db..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#version 450 core - -#define PRECISION ${PRECISION} -#define VEC4_T ${texel_type(DTYPE)} - -layout(std430) buffer; - -#include "indexing_utils.h" - -layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; -layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; - -layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { - ivec3 out_limits; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { - ivec4 sizes; -}; - -// index to select -layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal { - // data.x: index along height dim to select - // data.y: number of batches - // data.z: number of texels per batch - // data.w: unused - ivec4 select_info; -}; - -layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; - -void main() { - const ivec3 pos = ivec3(gl_GlobalInvocationID); - - if (any(greaterThanEqual(pos, out_limits))) { - return; - } - - const int num_batches = select_info.y; - const int num_texel_per_batch = select_info.z; - const int index = select_info.x; - - VEC4_T out_texel = VEC4_T(0, 0, 0, 0); - // read in the same channel from 4 separate batches - for (int k = 0; k < 4; k++) { - if ((k + pos.z * 4) >= num_batches - ) { // < 4 batches for this texel, exit early - break; - } - const uint src_pos_z = (pos.z * num_texel_per_batch * 4) + - k * num_texel_per_batch + (pos.y / 4); - out_texel[k] = VEC4_T(texelFetch( - image_in, ivec3(pos.x, index, src_pos_z), 0))[pos.y % 4]; - } - imageStore(image_out, pos, out_texel); -} diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.yaml b/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.yaml deleted file mode 100644 index c3724f1157a..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.yaml +++ /dev/null @@ -1,10 +0,0 @@ -select_height_4d: - parameter_names_with_default_values: - DTYPE: float - NDIM: 3 - generate_variant_forall: - DTYPE: - - VALUE: half - - VALUE: float - shader_variants: - - NAME: select_height_4d diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl deleted file mode 100644 index 1e12d15ab21..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#version 450 core - -#define PRECISION ${PRECISION} - -#define VEC4_T ${texel_type(DTYPE)} - -layout(std430) buffer; - -#include "indexing_utils.h" - -layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; -layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; - -layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { - ivec3 out_limits; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { - ivec4 sizes; -}; - -// index to select -layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal { - int index; -}; - -layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; - -void main() { - const ivec3 pos = ivec3(gl_GlobalInvocationID); - - if (any(greaterThanEqual(pos, out_limits))) { - return; - } - - // w - const int src_x = index; - // h - const int src_y = pos.x; - // c - const int src_z = pos.y; - - const VEC4_T v = VEC4_T(texelFetch(image_in, ivec3(src_x, src_y, src_z), 0)); - - for (int i = 0; i < 4; i++) { - ivec3 new_pos = ivec3(pos.x, pos.y * 4 + i, 0); - - // When the C-channel exceeds original block size, exit early - if (new_pos.y >= sizes.y) { - return; - } - - imageStore(image_out, new_pos, VEC4_T(v[i], 0, 0, 0)); - } -} diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.yaml b/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.yaml deleted file mode 100644 index a3070bf6ca3..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.yaml +++ /dev/null @@ -1,10 +0,0 @@ -select_width_3d: - parameter_names_with_default_values: - DTYPE: float - NDIM: 3 - generate_variant_forall: - DTYPE: - - VALUE: half - - VALUE: float - shader_variants: - - NAME: select_width_3d diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl deleted file mode 100644 index ffbd8afbda0..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#version 450 core - -#define PRECISION ${PRECISION} - -#define VEC4_T ${texel_type(DTYPE)} - -layout(std430) buffer; - -#include "indexing_utils.h" - -layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; -layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; - -layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { - ivec3 out_limits; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { - ivec4 sizes; -}; - -// index to select -layout(set = 0, binding = 4) uniform PRECISION restrict SelectVal { - // data.x: index along width dim to select - // data.y: number of batches - // data.z: number of texels per batch - // data.w: unused - ivec4 select_info; -}; - -layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; - -void main() { - const ivec3 pos = ivec3(gl_GlobalInvocationID); - - if (any(greaterThanEqual(pos, out_limits))) { - return; - } - - const int num_batches = select_info.y; - const int num_texel_per_batch = select_info.z; - const int index = select_info.x; - - //vec4 out_texel = vec4(0, 0, 0, 0); - VEC4_T out_texel = VEC4_T(0, 0, 0, 0); - // read in the same channel from 4 separate batches - for (int k = 0; k < 4; k++) { - if ((k + pos.z * 4) >= - num_batches) { // < 4 batches for this texel, exit early - break; - } - const uint src_pos_z = (pos.z * num_texel_per_batch * 4) + - k * num_texel_per_batch + (pos.y / 4); - - out_texel[k] = VEC4_T(texelFetch( - image_in, ivec3(index, pos.x, src_pos_z), 0))[pos.y % 4]; - } - imageStore(image_out, pos, out_texel); -} diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.yaml b/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.yaml deleted file mode 100644 index f1131d77395..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.yaml +++ /dev/null @@ -1,10 +0,0 @@ -select_width_4d: - parameter_names_with_default_values: - DTYPE: float - NDIM: 3 - generate_variant_forall: - DTYPE: - - VALUE: half - - VALUE: float - shader_variants: - - NAME: select_width_4d diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice.glslh b/backends/vulkan/runtime/graph/ops/glsl/slice.glslh new file mode 100644 index 00000000000..5d4cc70fdc1 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/glsl/slice.glslh @@ -0,0 +1,53 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#ifndef SLICE_GLSLH +#define SLICE_GLSLH + +/** + * Enable the fast path if a texel loaded from the input texture can be used as + * is to store to the output texture. The following conditions must be met: + * + * 1. The input and output textures have the same packed dimension. + * 2. The select_dim must not be the packed dimension of the input. + */ +bool can_use_fast_path() { + if (out_packed_dim != in_packed_dim) { + return false; + } + if (in_packed_dim == selected_dim) { + return false; + } + return true; +} + +/* + * Converts output tensor indices to input tensor indices for the slice operation. + * This function maps the output indices to the corresponding input indices based on + * the slice parameters (start, step, selected_dim). + * + * Parameters assumed to be defined in the layout specifier: + * - in_sizes + * - selected_dim + * - start + * - step + */ +ivec4 out_tidx_to_in_tidx(const ivec4 out_tidx) { + ivec4 in_tidx = out_tidx; + + int adjusted_start = start; + if (start < 0) { + adjusted_start = start + in_sizes[selected_dim]; + } + + in_tidx[selected_dim] = adjusted_start + out_tidx[selected_dim] * step; + + return in_tidx; +} + +#endif // SLICE_GLSLH diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.glsl b/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.glsl deleted file mode 100644 index 0a6fa31a65f..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.glsl +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#version 450 core - -#define PRECISION ${PRECISION} - -#define VEC4_T ${texel_type(DTYPE)} - -layout(std430) buffer; - -#include "indexing_utils.h" - -${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)} -${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)} -${layout_declare_ubo(2, "ivec4", "out_sizes")} -${layout_declare_ubo(3, "ivec4", "in_sizes")} - -layout(set = 0, binding = 4) uniform PRECISION restrict SliceArg { - int offset; - int step; -} -slice_arg; - -layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; - -layout(constant_id = 3) const int packed_dim = C_DIM; - -void main() { - const ivec3 out_pos = ivec3(gl_GlobalInvocationID); - const ivec4 idx = to_tensor_idx(out_pos, out_sizes, packed_dim); - - if (any(greaterThanEqual(idx, out_sizes))) { - return; - } - - // We map the output pos using the buffer index. For each index in the texel, - // we calculate the source whcn-coordinate amended with offset-ed channel - // value. Then we calculate the actual texture position from the - // whcn-coordinate. - const ivec4 buf_indices = tidx_to_nchwi(idx, out_sizes, packed_dim); - - vec4 outex; - for (int i=0;i<4;i++) { - ivec4 user_coor = nchwi_to_tidx(buf_indices[i], out_sizes); - - int in_dim = user_coor[packed_dim]; - - ivec4 in_user_coor = user_coor; - in_user_coor[packed_dim] = slice_arg.offset + in_dim * slice_arg.step; - - ivec4 in_pow_elem = to_texture_elem_pos( - in_user_coor, - in_sizes, - packed_dim); - - vec4 v = texelFetch(t_in, in_pow_elem.xyz, 0); - - outex[i] = v[in_pow_elem.w]; - } - imageStore(t_out, out_pos, outex); -} diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.yaml b/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.yaml deleted file mode 100644 index 718e7316824..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/slice_packed_dim.yaml +++ /dev/null @@ -1,11 +0,0 @@ -slice_packed_dim: - parameter_names_with_default_values: - DTYPE: float - NDIM: 3 - STORAGE: texture3d - generate_variant_forall: - DTYPE: - - VALUE: half - - VALUE: float - shader_variants: - - NAME: slice_packed_dim diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.glsl b/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.glsl deleted file mode 100644 index 54f0bd0b78c..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.glsl +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -#version 450 core - -#define PRECISION ${PRECISION} - -#define VEC4_T ${texel_type(DTYPE)} - -layout(std430) buffer; - -#include "indexing_utils.h" - -layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; -layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; - -layout(set = 0, binding = 2) uniform PRECISION restrict Sizes { - ivec4 sizes; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict SliceArg { - int dim; - int offset; - int step; - int image_in_channel_size; -} -slice_arg; - -layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; - -layout(constant_id = 3) const int packed_dim = C_DIM; - -void main() { - const ivec3 pos = ivec3(gl_GlobalInvocationID); - - if (pos_out_of_bounds(pos, sizes, packed_dim)) { - return; - } - - ivec3 in_pos = pos; - - // slice along batch axis - if (slice_arg.dim == 3) { - // index of the channel inside a batch - const int chanl_index = pos.z % slice_arg.image_in_channel_size; - // index of batch - const int batch_index = pos.z / slice_arg.image_in_channel_size; - in_pos.z = (slice_arg.offset + batch_index * slice_arg.step) * slice_arg.image_in_channel_size + chanl_index; - } else if (slice_arg.dim == C_DIM) { - // index of the channel inside a batch - const int chanl_index = pos.z % sizes.z; - // index of batch - const int batch_index = pos.z / sizes.z; - in_pos.z = slice_arg.offset + batch_index * slice_arg.image_in_channel_size + chanl_index * slice_arg.step; - } else if (slice_arg.dim == H_DIM) { - in_pos.y = slice_arg.offset + pos.y * slice_arg.step; - } else { - in_pos.x = slice_arg.offset + pos.x * slice_arg.step; - } - - imageStore(image_out, pos, texelFetch(image_in, in_pos, 0)); - -} diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.yaml b/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.yaml deleted file mode 100644 index 0453bb707b1..00000000000 --- a/backends/vulkan/runtime/graph/ops/glsl/slice_unpacked_dim.yaml +++ /dev/null @@ -1,10 +0,0 @@ -slice_unpacked_dim: - parameter_names_with_default_values: - DTYPE: float - NDIM: 3 - generate_variant_forall: - DTYPE: - - VALUE: half - - VALUE: float - shader_variants: - - NAME: slice_unpacked_dim diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl new file mode 100644 index 00000000000..3ca854e0526 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl @@ -0,0 +1,58 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#version 450 core + +#define PRECISION ${PRECISION} + +#define VEC4_T ${texel_type(DTYPE)} +#define T ${buffer_scalar_type(DTYPE)} + +${define_active_storage_type("buffer")} +${define_required_extensions(DTYPE)} + +layout(std430) buffer; + +#include "indexing_utils.h" +${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")} +${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")} + +$if OP_NAME == "slice": + ${layout_declare_ubo(B, "int", "start")} + ${layout_declare_ubo(B, "int", "step")} + +$if OP_NAME == "select": + ${layout_declare_ubo(B, "int", "index")} + +layout(push_constant) uniform restrict Block { + ivec4 in_sizes; + ivec4 out_strides; + ivec4 in_strides; + int out_numel; + int selected_dim; +}; + +${layout_declare_spec_const(C, "int", "out_packed_dim", "DEFAULT_LAYOUT")} +${layout_declare_spec_const(C, "int", "in_packed_dim", "DEFAULT_LAYOUT")} + +layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; + +#include "${OP_NAME}.glslh" + +void main() { + const int out_bufi = ivec3(gl_GlobalInvocationID).x; + if (out_bufi >= out_numel) { + return; + } + + const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_packed_dim); + ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx); + + const int in_bufi = tidx_to_bufi(in_tidx, in_strides); + t_out[out_bufi] = t_in[in_bufi]; +} diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.yaml b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml similarity index 54% rename from backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.yaml rename to backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml index 6236555f5dd..bdde613c8ce 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml @@ -1,10 +1,13 @@ -select_channel_4d: +transfer_buffer: parameter_names_with_default_values: DTYPE: float - NDIM: 3 + OP_NAME: select generate_variant_forall: DTYPE: - VALUE: half - VALUE: float shader_variants: - - NAME: select_channel_4d + - NAME: select_buffer + OP_NAME: select + - NAME: slice_buffer + OP_NAME: slice diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl new file mode 100644 index 00000000000..d3e25436c04 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl @@ -0,0 +1,83 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#version 450 core + +#define PRECISION ${PRECISION} + +#define VEC4_T ${texel_type(DTYPE)} +#define T ${buffer_scalar_type(DTYPE)} + +${define_active_storage_type("texture3d")} +${define_required_extensions(DTYPE)} + +layout(std430) buffer; + +#include "indexing_utils.h" + +${layout_declare_tensor(B, "w", "t_out", DTYPE, "texture3d")} +${layout_declare_tensor(B, "r", "t_in", DTYPE, "texture3d")} + +$if OP_NAME == "slice": + ${layout_declare_ubo(B, "int", "start")} + ${layout_declare_ubo(B, "int", "step")} + +$if OP_NAME == "select": + ${layout_declare_ubo(B, "int", "index")} + +layout(push_constant) uniform restrict Block { + ivec4 out_sizes; + ivec4 in_sizes; + int selected_dim; +}; + +${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); +const lowp int out_packed_dim = unhash_packed_dim(out_layout); + +${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")} +const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); +const lowp int in_packed_dim = unhash_packed_dim(in_layout); + +layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; + +#include "${OP_NAME}.glslh" + +void main() { + const ivec3 lpos = ivec3(gl_GlobalInvocationID); + ivec4 out_tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, out_packed_dim); + + if (any(greaterThanEqual(out_tidx, out_sizes))) { + return; + } + + if (can_use_fast_path()) { + ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx); + ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim); + VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos)); + + write_texel_lpos(t_out, lpos, in_texel, out_axis_map); + } + else { + VEC4_T out_texel = VEC4_T(0); + for (int texel_i = 0; texel_i < 4; ++texel_i) { + ivec4 in_tidx = out_tidx_to_in_tidx(out_tidx); + ivec3 in_pos = tidx_to_pos(in_tidx, in_sizes, in_axis_map, in_packed_dim); + int element_idx = in_tidx[in_packed_dim] % 4; + + VEC4_T in_texel = VEC4_T(load_texel(t_in, in_pos)); + T selected_value = T(in_texel[element_idx]); + + out_texel[texel_i] = selected_value; + + out_tidx[out_packed_dim]++; + } + + write_texel_lpos(t_out, lpos, out_texel, out_axis_map); + } +} diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.yaml b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml similarity index 52% rename from backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.yaml rename to backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml index 9c7d54c8f69..f877ee036e4 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml @@ -1,10 +1,13 @@ -select_batch_4d: +transfer_texture: parameter_names_with_default_values: DTYPE: float - NDIM: 3 + OP_NAME: select generate_variant_forall: DTYPE: - VALUE: half - VALUE: float shader_variants: - - NAME: select_batch_4d + - NAME: select_texture3d + OP_NAME: select + - NAME: slice_texture3d + OP_NAME: slice diff --git a/backends/vulkan/runtime/graph/ops/impl/Common.cpp b/backends/vulkan/runtime/graph/ops/impl/Common.cpp new file mode 100644 index 00000000000..4de099231d3 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/impl/Common.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace vkcompute { + +utils::uvec3 default_pick_global_wg_size( + ComputeGraph* graph, + const vkapi::ShaderInfo& shader, + const std::vector& args, + const std::vector& additional_args) { + (void)shader; + const ValueRef out = args.at(0).refs.at(0); + return graph->create_global_wg_size(out); +} + +utils::uvec3 default_pick_local_wg_size( + ComputeGraph* graph, + const vkapi::ShaderInfo& shader, + const utils::uvec3& global_workgroup_size, + const std::vector& args, + const std::vector& additional_args) { + (void)shader; + return graph->create_local_wg_size(global_workgroup_size); +} + +} // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/impl/Common.h b/backends/vulkan/runtime/graph/ops/impl/Common.h new file mode 100644 index 00000000000..d5ff455ae41 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/impl/Common.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace vkcompute { + +/** + * Creates a global workgroup size based on the first output tensor in the args. + * This is a utility function that extracts the output tensor from + * args.at(0).refs.at(0) and calls graph->create_global_wg_size(out) on it. + * + * @param graph The ComputeGraph instance + * @param args Vector of ArgGroup containing the output tensor reference + * @return utils::uvec3 The global workgroup size + */ +utils::uvec3 default_pick_global_wg_size( + ComputeGraph* graph, + const vkapi::ShaderInfo& shader, + const std::vector& args, + const std::vector& additional_args); + +/** + * Creates a local workgroup size based on the first output tensor in the args. + * This is a utility function that extracts the output tensor from + * args.at(0).refs.at(0) and calls graph->create_local_wg_size(out) on it. + * + * @param graph The ComputeGraph instance + * @param args Vector of ArgGroup containing the output tensor reference + * @return utils::uvec3 The local workgroup size + */ +utils::uvec3 default_pick_local_wg_size( + ComputeGraph* graph, + const vkapi::ShaderInfo& shader, + const utils::uvec3& global_workgroup_size, + const std::vector& args, + const std::vector& additional_args); + +} // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/impl/Select.cpp b/backends/vulkan/runtime/graph/ops/impl/Select.cpp index a83e986e414..69d49e8283b 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Select.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Select.cpp @@ -8,129 +8,136 @@ #include -#include -#include - -#include +#include #include #include namespace vkcompute { -void check_args( - const api::vTensor& t_in, - int64_t dim, - int64_t index, - const api::vTensor& t_out) { - VK_CHECK_COND(check_packed_dim_is(t_in, WHCN::kChannelsDim)); - VK_CHECK_COND(check_packed_dim_is(t_out, WHCN::kChannelsDim)); +void resize_select_node( + ComputeGraph* graph, + const std::vector& args, + const std::vector& extra_args) { + ValueRef out = args.at(0).refs.at(0); + ValueRef in = args.at(1).refs.at(0); + int64_t dim = graph->extract_scalar(extra_args.at(0)); - const int64_t in_dim = t_in.dim(); - VK_CHECK_COND( - in_dim == 3 || in_dim == 4, - "Vulkan select only support 3d or 4d tensors!"); - - const int64_t in_size = t_in.size(dim); - - if (index < -in_size || index >= in_size) { - VK_CHECK_COND( - false, - "select(): index ", - index, - " t_outof range for tensor of size ", - in_size, - " at dimension ", - dim); + int64_t in_ndim = graph->dim_of(in); + + if (dim < 0) { + dim += in_ndim; + } + + std::vector new_out_sizes; + for (int64_t i = 0; i < in_ndim; ++i) { + if (i != dim) { + new_out_sizes.push_back(graph->size_at(i, in)); + } } + + graph->virtual_resize(out, new_out_sizes); } -void add_select_int_node( +void check_select_args( ComputeGraph& graph, const ValueRef in, const ValueRef dim_ref, const ValueRef index_ref, const ValueRef out) { - vTensorPtr t_in = graph.get_tensor(in); - vTensorPtr t_out = graph.get_tensor(out); int64_t dim = graph.extract_scalar(dim_ref); - int64_t index = graph.extract_scalar(index_ref); + int64_t index = graph.extract_optional_scalar(index_ref, 0); + int64_t in_ndim = graph.dim_of(in); - check_args(*t_in, dim, index, *t_out); + if (dim < 0) { + dim += in_ndim; + } - const int64_t in_size = t_in->size(dim); + VK_CHECK_COND( + dim >= 0 && dim < in_ndim, + "Dimension out of range (expected to be in range of [", + -in_ndim, + ", ", + in_ndim - 1, + "], but got ", + dim, + ")"); + + const int64_t in_size_at_dim = graph.size_at(dim, in); if (index < 0) { - index += in_size; + index += in_size_at_dim; } - std::string kernel_name; - - // for 3d tensors, these values are not used by the shader. - int32_t num_texel_per_batch = 1; - int32_t num_batches = 1; - - int64_t in_dim = t_in->dim(); - if (in_dim == 3) { - if (dim == 0) { - kernel_name = "select_channel_3d"; - } else if (dim == 1) { - kernel_name = "select_height_3d"; - } else if (dim == 2) { - kernel_name = "select_width_3d"; - } else { - VK_CHECK_COND( - false, "Unexpected dim value=", dim, "for the input 3d tensor"); - } - } else { // self.dim() == 4 - num_texel_per_batch = - static_cast(std::ceil(static_cast(t_in->size(1)) / 4)); - num_batches = t_in->size(0); - if (dim == 0) { - kernel_name = "select_batch_4d"; - } else if (dim == 1) { - kernel_name = "select_channel_4d"; - } else if (dim == 2) { - kernel_name = "select_height_4d"; - } else if (dim == 3) { - kernel_name = "select_width_4d"; - } else { + VK_CHECK_COND( + index >= 0 && index < in_size_at_dim, + "select(): index ", + index, + " out of range for tensor of size ", + in_size_at_dim, + " at dimension ", + dim); + + // Check that output tensor has correct dimensions + int64_t out_dim = graph.dim_of(out); + VK_CHECK_COND( + out_dim == in_ndim - 1, + "Output tensor dimension mismatch (expected ", + in_size_at_dim - 1, + ", but got ", + out_dim, + ")"); + + // Check that output tensor has correct sizes + int64_t out_idx = 0; + for (int64_t i = 0; i < in_size_at_dim; ++i) { + if (i != dim) { VK_CHECK_COND( - false, "Unexpected dim value=", dim, "for the input 4d tensor"); + graph.size_at(out_idx, out) == graph.size_at(i, in), + "Output size mismatch at dimension ", + out_idx, + " (expected ", + graph.size_at(i, in), + ", but got ", + graph.size_at(out_idx, out), + ")"); + out_idx++; } } +} - kernel_name.reserve(kShaderNameReserve); - add_dtype_suffix(kernel_name, *t_out); +/** + * Adds a select operation node to the compute graph. + * + * The select operator extracts a slice from a tensor along a specified + * dimension at a given index. It effectively reduces the dimensionality of the + * input tensor by one, by selecting a single slice at the specified index along + * the given dimension. For example, if input is a 3D tensor with shape [2,3,4] + * and we select dimension 1, index 2, the output will be a 2D tensor with shape + * [2,4]. + */ +void add_select_copy_node( + ComputeGraph& graph, + const ValueRef in, + const ValueRef dim_ref, + const ValueRef index_ref, + const ValueRef out) { + check_select_args(graph, in, dim_ref, index_ref, out); - // TODO: add resizing to support dynamic shapes. - graph.execute_nodes().emplace_back(new DispatchNode( + add_transfer_copy_node( graph, - VK_KERNEL_FROM_STR(kernel_name), - graph.create_global_wg_size(out), - graph.create_local_wg_size(out), - // Inputs and Outputs - {{out, vkapi::MemoryAccessType::WRITE}, - {in, vkapi::MemoryAccessType::READ}}, - // Parameter buffers - {t_out->logical_limits_ubo(), - t_out->sizes_ubo(), - // TODO: num_batches and num_texel_per_batch are provided by - // t_out->sizes. Can change the following to reduce params - // created. - graph.create_params_buffer( - utils::make_ivec4({index, num_batches, num_texel_per_batch, 0}))}, - // Push Constants - {}, - // Specialization Constants - {}, - // Resize Args - {}, - // Resizing Logic - nullptr)); + TransferType::SELECT, + in, + dim_ref, + index_ref, + kDummyValueRef, + kDummyValueRef, + out, + {dim_ref, index_ref}, + resize_select_node); } void select_int(ComputeGraph& graph, const std::vector& args) { - return add_select_int_node(graph, args[0], args[1], args[2], args[3]); + return add_select_copy_node(graph, args[0], args[1], args[2], args[3]); } REGISTER_OPERATORS { diff --git a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp index c40e16f7c0a..67d714d10aa 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp @@ -8,12 +8,10 @@ #include -#include - #include +#include #include -#include #include #include @@ -33,127 +31,73 @@ inline int64_t normalize_idx( return normalize(index, max); } -void add_slice_tensor_copy_node( - ComputeGraph& graph, - ValueRef in, - ValueRef dim_ref, - ValueRef opt_start_ref, - ValueRef opt_end_ref, - ValueRef step_ref, - ValueRef out) { - vTensorPtr t_in = graph.get_tensor(in); - vTensorPtr t_out = graph.get_tensor(out); - - VK_CHECK_COND(check_same_packed_dim(*t_in, *t_out)); - - // Need normalize the dim - int64_t dim = graph.extract_scalar(dim_ref); - - VK_CHECK_COND( - -t_in->dim() <= dim && dim < t_in->dim(), - "dim must be in range of [-self.dim(), self.dim()), but current dim's value is ", - dim, - " and self.dim() = ", - t_in->dim()); - - dim = normalize(dim, t_in->dim()); - - DimIndex dim_index = normalize_to_dim_index(*t_in, dim); +void resize_slice_copy_node( + ComputeGraph* graph, + const std::vector& args, + const std::vector& extra_args) { + ValueRef out_ref = args.at(0).refs.at(0); + ValueRef in_ref = args.at(1).refs.at(0); + int64_t dim = graph->extract_scalar(extra_args.at(0)); std::optional opt_start = - graph.extract_optional_scalar(opt_start_ref); + graph->extract_optional_scalar(extra_args.at(1)); std::optional opt_end = - graph.extract_optional_scalar(opt_end_ref); - int64_t step = graph.extract_scalar(step_ref); - - const auto in_sizes = t_in->sizes(); - const auto out_sizes = t_out->sizes(); - - int64_t start = opt_start.value_or(0); - int64_t end = opt_end.value_or(in_sizes[dim]); + graph->extract_optional_scalar(extra_args.at(2)); + int64_t step = graph->extract_scalar(extra_args.at(3)); - start = normalize_idx(start, in_sizes[dim], 0); - end = normalize_idx(end, in_sizes[dim], in_sizes[dim]); + // Normalize dim + if (dim < 0) { + dim += graph->dim_of(in_ref); + } - const vkapi::SpecVarList spec_vars = {t_in->packed_dim()}; + const std::vector in_sizes = graph->sizes_of(in_ref); + int64_t dim_size = in_sizes.at(dim); - const auto packed_dim_idx = - static_cast(DimIndex::DIM_LAST - t_in->packed_dim()); + int64_t start = opt_start.value_or(0); + int64_t end = opt_end.value_or(dim_size); - // if slice dim is the same as the packed dim, we can use the channel slice - if (dim_index == packed_dim_idx) { - // slice by channel - std::string kernel_name = "slice_packed_dim"; - kernel_name.reserve(kShaderNameReserve); - add_dtype_suffix(kernel_name, *t_out); + // Normalize start and end indices + start = normalize_idx(start, dim_size, 0); + end = normalize_idx(end, dim_size, dim_size); - const struct Block final { - int offset; - int step; - } params{ - static_cast(start), - static_cast(step), - }; + // Calculate output size + std::vector new_out_sizes = in_sizes; + new_out_sizes.at(dim) = (end - start + step - 1) / step; // Ceiling division - graph.execute_nodes().emplace_back(new DispatchNode( - graph, - VK_KERNEL_FROM_STR(kernel_name), - graph.create_global_wg_size(out), - graph.create_local_wg_size(out), - {{out, vkapi::MemoryAccessType::WRITE}, - {in, vkapi::MemoryAccessType::READ}}, - {t_out->sizes_ubo(), - t_in->sizes_ubo(), - graph.create_params_buffer(params)}, - {}, - spec_vars, - {}, - nullptr)); - - } else { - // GPU's coordinate is in x = 0, y = 1, z = 2, w = 3 - const int64_t gpu_dim = -(dim_index + 1); - // stride of input tensor's channel dimension - int64_t in_channel_stride = dim_at(in_sizes, kChannel4D); - VK_CHECK_COND(out_sizes[dim] == (1 + (end - start - 1) / step)); - - // Due to channel packing, each batch value is span over stride planes - if (dim_index == kBatch4D && packed_dim_idx == kChannel4D) { - in_channel_stride = utils::div_up_4(in_channel_stride); - } + graph->virtual_resize(out_ref, new_out_sizes); +} - std::string kernel_name = "slice_unpacked_dim"; - kernel_name.reserve(kShaderNameReserve); - add_dtype_suffix(kernel_name, *t_out); - - utils::uvec3 global_size = t_out->logical_limits(); - utils::uvec3 local_size = graph.create_local_wg_size(global_size); - - const struct Block final { - int dim; - int offset; - int step; - int stride; - } params{ - static_cast(gpu_dim), - static_cast(start), - static_cast(step), - static_cast(in_channel_stride), - }; - - graph.execute_nodes().emplace_back(new DispatchNode( - graph, - VK_KERNEL_FROM_STR(kernel_name), - global_size, - local_size, - {{out, vkapi::MemoryAccessType::WRITE}, - {in, vkapi::MemoryAccessType::READ}}, - {t_out->sizes_ubo(), graph.create_params_buffer(params)}, - {}, - spec_vars, - {}, - nullptr)); - } +/** + * Adds a slice_copy operation node to the compute graph. + * + * The slice operator extracts a portion of a tensor along a specified + * dimension. It creates a new tensor that contains a subset of the input + * tensor's data, defined by start, end, and step parameters along the given + * dimension. + * + * For example, if input is a tensor with shape [4,5,6] and we slice along + * dimension 1 with start=1, end=4, step=2, the output will have shape [4,2,6], + * containing elements from the input at positions 1 and 3 along dimension 1. + */ +void add_slice_copy_node( + ComputeGraph& graph, + ValueRef in, + ValueRef dim_ref, + ValueRef opt_start_ref, + ValueRef opt_end_ref, + ValueRef step_ref, + ValueRef out) { + add_transfer_copy_node( + graph, + TransferType::SLICE, + in, + dim_ref, + opt_start_ref, + opt_end_ref, + step_ref, + out, + {dim_ref, opt_start_ref, opt_end_ref, step_ref}, + resize_slice_copy_node); } std::vector get_slice_sizes( @@ -186,16 +130,16 @@ void resize_slice_view_node( const std::vector& args, const std::vector& extra_args) { (void)args; - vTensorPtr out = graph->get_tensor(extra_args[0]); + ValueRef out_ref = extra_args.at(0); std::vector new_out_sizes = get_slice_sizes( *graph, - extra_args[1], // input - extra_args[2], // dim - extra_args[3], // optional start - extra_args[4]); // optional end + extra_args.at(1), // input + extra_args.at(2), // dim + extra_args.at(3), // optional start + extra_args.at(4)); // optional end - out->virtual_resize(new_out_sizes); + graph->virtual_resize(out_ref, new_out_sizes); } void check_slice_view_args( @@ -267,54 +211,54 @@ void add_slice_view_node( std::vector new_out_sizes = get_slice_sizes(graph, in_ref, dim_ref, opt_start_ref, opt_end_ref); - graph.get_tensor(out_ref)->virtual_resize(new_out_sizes); + graph.virtual_resize(out_ref, new_out_sizes); graph.execute_nodes().emplace_back(new ExecuteNode( resize_slice_view_node, {out_ref, in_ref, dim_ref, opt_start_ref, opt_end_ref, opt_step_ref})); } -void slice_tensor_copy(ComputeGraph& graph, const std::vector& args) { - return add_slice_tensor_copy_node( +void slice_copy(ComputeGraph& graph, const std::vector& args) { + return add_slice_copy_node( graph, - args[0], - args[1], // dim - args[2], // optional start - args[3], // optional end - args[4], // step - args[5]); + args.at(0), + args.at(1), // dim + args.at(2), // optional start + args.at(3), // optional end + args.at(4), // step + args.at(5)); } -void slice_tensor(ComputeGraph& graph, const std::vector& args) { - ValueRef in = args[0]; - ValueRef out = args[5]; +void slice(ComputeGraph& graph, const std::vector& args) { + ValueRef in = args.at(0); + ValueRef out = args.at(5); // Special case if out is a view of in if (graph.val_is_view_of(out, in)) { add_slice_view_node( graph, in, - args[1], // dim - args[2], // optional start - args[3], // optional end - args[4], // step + args.at(1), // dim + args.at(2), // optional start + args.at(3), // optional end + args.at(4), // step out); return; } - add_slice_tensor_copy_node( + add_slice_copy_node( graph, in, - args[1], // dim - args[2], // optional start - args[3], // optional end - args[4], // step + args.at(1), // dim + args.at(2), // optional start + args.at(3), // optional end + args.at(4), // step out); } REGISTER_OPERATORS { - VK_REGISTER_OP(aten.slice_copy.Tensor, slice_tensor_copy); - VK_REGISTER_OP(aten.slice.Tensor, slice_tensor); + VK_REGISTER_OP(aten.slice_copy.Tensor, slice_copy); + VK_REGISTER_OP(aten.slice.Tensor, slice); } } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp b/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp new file mode 100644 index 00000000000..423c9789d67 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include +#include +#include + +namespace vkcompute { + +/** + * Adds a transfer copy operation node to the compute graph. + * This function handles both SELECT and SLICE operations based on the + * transfer_type parameter. + */ +void add_transfer_copy_node( + ComputeGraph& graph, + TransferType transfer_type, + const ValueRef in, + const ValueRef dim_ref, + const ValueRef index_or_start_ref, + const ValueRef end_ref, + const ValueRef step_ref, + const ValueRef out, + const std::vector& resize_args, + const ExecuteNode::ResizeFunction& resize_fn) { + int64_t ndim = graph.dim_of(in); + int64_t dim = graph.extract_scalar(dim_ref); + + if (dim < 0) { + dim += ndim; + } + + int64_t dim_whcn = nchw_dim_to_whcn_dim(dim, ndim); + + vkapi::ParamsBindList param_buffers; + if (transfer_type == TransferType::SELECT) { + param_buffers = { + graph.get_or_create_int_param_buffer(index_or_start_ref, 0)}; + } else { // TransferType::SLICE + param_buffers = { + graph.get_or_create_int_param_buffer(index_or_start_ref, 0), + graph.get_or_create_int_param_buffer(step_ref, 1)}; + } + + const struct TransferParams { + const int32_t dim; + } transfer_params{static_cast(dim_whcn)}; + + std::vector push_constants; + vkapi::SpecVarList spec_vars; + + if (graph.is_buffer_storage(out)) { + push_constants = { + graph.sizes_pc_of(in), + graph.strides_pc_of(out), + graph.strides_pc_of(in), + graph.numel_pc_of(out), + PushConstantDataInfo(&transfer_params, sizeof(transfer_params))}; + + spec_vars = { + graph.packed_dim_of(out), + graph.packed_dim_of(in), + }; + } else { + push_constants = { + graph.sizes_pc_of(out), + graph.sizes_pc_of(in), + PushConstantDataInfo(&transfer_params, sizeof(transfer_params))}; + + spec_vars = { + graph.hashed_layout_of(out), + graph.hashed_layout_of(in), + }; + } + + // Determine the shader directly + std::string kernel_name; + if (transfer_type == TransferType::SELECT) { + kernel_name = "select"; + } else { // TransferType::SLICE + kernel_name = "slice"; + } + add_storage_type_suffix(kernel_name, graph.storage_type_of(out)); + add_dtype_suffix(kernel_name, graph.dtype_of(out)); + + // Create and add the dispatch node + graph.execute_nodes().emplace_back(new DynamicDispatchNode( + graph, + VK_KERNEL_FROM_STR(kernel_name), + default_pick_global_wg_size, + default_pick_local_wg_size, + // Inputs and Outputs + {{out, vkapi::kWrite}, {in, vkapi::kRead}}, + // Parameter buffers + param_buffers, + // Push Constants + push_constants, + // Specialization Constants + spec_vars, + // Resize Args + resize_args, + // Resizing Logic + resize_fn)); +} + +} // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/impl/Transfer.h b/backends/vulkan/runtime/graph/ops/impl/Transfer.h new file mode 100644 index 00000000000..09aae144994 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/impl/Transfer.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace vkcompute { + +enum class TransferType { SELECT, SLICE }; + +/** + * Adds a transfer copy operation node to the compute graph, which implements + * operators for which each element of the output tensor maps to a unique + * element of the input tensor. + * + * This function currently handles the following operations: + * - select + * - slice + */ +void add_transfer_copy_node( + ComputeGraph& graph, + TransferType transfer_type, + const ValueRef in, + const ValueRef dim_ref, + const ValueRef index_or_start_ref, + const ValueRef end_ref, + const ValueRef step_ref, + const ValueRef out, + const std::vector& resize_args, + const ExecuteNode::ResizeFunction& resize_fn = nullptr); + +} // namespace vkcompute diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py index 40e86a13b42..bf6e9683ef7 100644 --- a/backends/vulkan/test/op_tests/cases.py +++ b/backends/vulkan/test/op_tests/cases.py @@ -499,7 +499,9 @@ def get_ones_inputs(): def get_select_int_inputs(): test_suite = VkTestSuite( [ - ((6, 2, 7), 0, 3), + ((8, 8, 8), 0, -2), + ((8, 8, 8), 1, -3), + ((8, 8, 8), 2, -4), ((6, 2, 7), 1, 0), ((6, 2, 7), 2, 3), ((6, 10, 7), 0, 3), @@ -515,6 +517,10 @@ def get_select_int_inputs(): ((8, 6, 1, 1), 1, 4), ] ) + test_suite.layouts = ["utils::kWidthPacked", "utils::kChannelsPacked"] + test_suite.storage_types = ["utils::kBuffer", "utils::kTexture3D"] + test_suite.dtypes = ["at::kFloat"] + test_suite.data_gen = "make_seq_tensor" return test_suite diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py index 80ead02de9f..447e5d039f4 100644 --- a/backends/vulkan/test/test_vulkan_delegate.py +++ b/backends/vulkan/test/test_vulkan_delegate.py @@ -1842,3 +1842,50 @@ def forward(self, x): dynamic_shapes=dynamic_shapes, test_inputs=test_inputs, ) + + def test_select_last_height_dynamic_shapes(self): + """ + Test selecting the last element along the height dimension with dynamic shapes. + The height dimension (dim=1) is variable. + """ + + class SelectLastHeightModule(torch.nn.Module): + """ + Module that selects the last element along the height dimension (dim=1) of a 3D tensor. + This is equivalent to the operation: x[:, -1, :] + """ + + def __init__(self): + super().__init__() + + def forward(self, x): + # Select the last element along dimension 1 (height) + return x[:, -1, :] + + # Create the module + module = SelectLastHeightModule() + + # Create sample inputs with a specific shape + # Shape: [batch_size, height, width] + sample_inputs = (torch.arange(1, 61).reshape(2, 10, 3).float(),) + + # Define dynamic shapes for the height dimension + height = Dim("height", min=1, max=10) + dynamic_shapes = {"x": {1: height}} + + # Create test inputs with different heights + test_inputs = [ + (torch.arange(1, 7).reshape(2, 1, 3).float(),), # Minimum height + (torch.arange(1, 19).reshape(2, 3, 3).float(),), # Small height + (torch.arange(1, 43).reshape(2, 7, 3).float(),), # Medium height + (torch.arange(1, 31).reshape(2, 5, 3).float(),), # Maximum height + ] + + # Use the testing infrastructure from TestVulkanBackend + test_backend = TestVulkanBackend() + test_backend.lower_module_and_test_output( + module, + sample_inputs, + dynamic_shapes=dynamic_shapes, + test_inputs=test_inputs, + ) diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index a6475d95d07..60dfb3b8606 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -1660,9 +1661,8 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { for (auto& new_sizes : new_sizes_list) { graph.get_tensor(a.value)->virtual_resize(new_sizes); graph.get_tensor(b.value)->virtual_resize(new_sizes); - graph.get_tensor(c)->virtual_resize(new_sizes); graph.get_tensor(d.value)->virtual_resize(new_sizes); - graph.get_tensor(e)->virtual_resize(new_sizes); + graph.propagate_resize(); float val_a = new_sizes[1] + 4.0f; float val_b = new_sizes[2] + 1.5f; @@ -3315,17 +3315,23 @@ vkapi::ShaderInfo pick_dynamic_dispatch_shader( utils::uvec3 pick_dynamic_dispatch_global_wg_size( ComputeGraph* graph, + const vkapi::ShaderInfo& shader, const std::vector& args, - const std::vector& additional_args) { + const std::vector& resize_args) { + (void)shader; const ValueRef out = args[0].refs[0]; - return graph->logical_limits_of(out); } utils::uvec3 pick_dynamic_dispatch_local_wg_size( ComputeGraph* graph, + const vkapi::ShaderInfo& shader, + const utils::uvec3& global_workgroup_size, const std::vector& args, - const std::vector& additional_args) { + const std::vector& resize_args) { + (void)graph; + (void)shader; + (void)global_workgroup_size; return {64, 1, 1}; }