From 85726c49e8f4439ac5207ca734961dac345d50a8 Mon Sep 17 00:00:00 2001 From: Stephen Jia Date: Fri, 30 May 2025 08:25:58 -0700 Subject: [PATCH 1/2] [ET-VK][ez] Enable dynamic shape support when using push constants ## Changes * Call `encode_execute()` upon resize in `VulkanBackend.cpp` * Minor update to `DispatchNode` to store push constant data array as a persistent member of the class ## Motivation Passing in tensor metadata (i.e. sizes, strides) via push constants is typically more performant than passing them via a UBO (uniform buffer object). However, currently dynamic shapes do not work when push constants are used as I realized that the tensor metadata contained in the push constants do not get updated. It appears that that `vkCmdPushConstants` sets the push constants when encoding the command buffer, however the push constants will not be updated if the command buffer is submitted for execution multiple times. Therefore, to update push constant values **the command buffer needs to be re-encoded**. ## Performance Impact This may add a small performance overhead (i.e. re-encoding the command buffer) when executing models with dynamic shapes. Models that do not trigger tensor resizing will not be impacted. However, I measured the impact on a llama 3.2 1B model and the impact of re-encoding a command buffer appears to be negligible. In any case, re-encoding the command buffer is a "necessary evil" when working with dynamic shapes, otherwise the tensor metadata seen by shaders may never get updated. Furthermore, re-encoding the command buffer can allow an opportunity to adjust global work group sizing to match current tensor sizes, which may have a huge performance impact when maximum tensor sizes far exceeds what tensor sizes will realistically be during inference (one instance of this is for transformer models when the max sequence length is very long). Differential Revision: [D75686051](https://our.internmc.facebook.com/intern/diff/D75686051/) [ghstack-poisoned] --- backends/vulkan/runtime/VulkanBackend.cpp | 1 + .../vulkan/runtime/graph/ops/DispatchNode.cpp | 30 +++++++++++-------- .../vulkan/runtime/graph/ops/DispatchNode.h | 6 ++++ .../vulkan/runtime/graph/ops/ExecuteNode.h | 2 +- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp index b32f4eb4308..4a3d077bfb7 100644 --- a/backends/vulkan/runtime/VulkanBackend.cpp +++ b/backends/vulkan/runtime/VulkanBackend.cpp @@ -569,6 +569,7 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { if (should_propagate_resize) { compute_graph->propagate_resize(); + compute_graph->encode_execute(); } compute_graph->execute(); diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.cpp b/backends/vulkan/runtime/graph/ops/DispatchNode.cpp index 51ff0c122b0..09f5372fb8b 100644 --- a/backends/vulkan/runtime/graph/ops/DispatchNode.cpp +++ b/backends/vulkan/runtime/graph/ops/DispatchNode.cpp @@ -46,15 +46,7 @@ void DispatchNode::encode(ComputeGraph* graph) { std::unique_lock cmd_lock = context->dispatch_lock(); - std::array push_constants_data; - uint32_t push_constants_offset = 0; - - for (const auto& push_constant : push_constants_) { - push_constants_offset += push_constant.write( - push_constants_data.data(), - push_constants_offset, - kMaxPushConstantSize); - } + write_push_constant_data(); context->report_shader_dispatch_start( shader_.kernel_name, @@ -63,7 +55,7 @@ void DispatchNode::encode(ComputeGraph* graph) { node_id_); vkapi::DescriptorSet descriptor_set = context->get_descriptor_set( - shader_, local_workgroup_size_, spec_vars_, push_constants_offset); + shader_, local_workgroup_size_, spec_vars_, push_constants_offset_); uint32_t idx = 0; idx = bind_values_to_descriptor_set( @@ -76,10 +68,24 @@ void DispatchNode::encode(ComputeGraph* graph) { pipeline_barrier, shader_, global_workgroup_size_, - push_constants_data.data(), - push_constants_offset); + push_constants_data_.data(), + push_constants_offset_); context->report_shader_dispatch_end(); } +void DispatchNode::write_push_constant_data() { + if (push_constants_.empty()) { + return; + } + + push_constants_offset_ = 0; + for (const auto& push_constant : push_constants_) { + push_constants_offset_ += push_constant.write( + push_constants_data_.data(), + push_constants_offset_, + kMaxPushConstantSize); + } +} + } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.h b/backends/vulkan/runtime/graph/ops/DispatchNode.h index c45f0a741fd..d6d5300804d 100644 --- a/backends/vulkan/runtime/graph/ops/DispatchNode.h +++ b/backends/vulkan/runtime/graph/ops/DispatchNode.h @@ -50,6 +50,12 @@ class DispatchNode : public ExecuteNode { const vkapi::SpecVarList spec_vars_; const std::vector push_constants_; + // For push constants + std::array push_constants_data_; + uint32_t push_constants_offset_ = 0; + + void write_push_constant_data(); + public: operator bool() const { return shader_; diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h index 7563fc63c71..0731722e13a 100644 --- a/backends/vulkan/runtime/graph/ops/ExecuteNode.h +++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.h @@ -65,7 +65,7 @@ class ExecuteNode { (void)graph; } - inline void trigger_resize(ComputeGraph* graph) { + virtual inline void trigger_resize(ComputeGraph* graph) { if (resize_fn_ != nullptr) { resize_fn_(graph, args_, resize_args_); } From e4b080ae60953950002acf16ccd1dff18c230df7 Mon Sep 17 00:00:00 2001 From: Stephen Jia Date: Fri, 30 May 2025 08:59:36 -0700 Subject: [PATCH 2/2] Update on "[ET-VK][ez] Enable dynamic shape support when using push constants" ## Changes * Call `encode_execute()` upon resize in `VulkanBackend.cpp` * Minor update to `DispatchNode` to store push constant data array as a persistent member of the class ## Motivation Passing in tensor metadata (i.e. sizes, strides) via push constants is typically more performant than passing them via a UBO (uniform buffer object). However, currently dynamic shapes do not work when push constants are used as I realized that the tensor metadata contained in the push constants do not get updated. It appears that that `vkCmdPushConstants` sets the push constants when encoding the command buffer, however the push constants will not be updated if the command buffer is submitted for execution multiple times. Therefore, to update push constant values **the command buffer needs to be re-encoded**. ## Performance Impact This may add a small performance overhead (i.e. re-encoding the command buffer) when executing models with dynamic shapes. Models that do not trigger tensor resizing will not be impacted. However, I measured the impact on a llama 3.2 1B model and the impact of re-encoding a command buffer appears to be negligible. In any case, re-encoding the command buffer is a "necessary evil" when working with dynamic shapes, otherwise the tensor metadata seen by shaders may never get updated. Furthermore, re-encoding the command buffer can allow an opportunity to adjust global work group sizing to match current tensor sizes, which may have a huge performance impact when maximum tensor sizes far exceeds what tensor sizes will realistically be during inference (one instance of this is for transformer models when the max sequence length is very long). Differential Revision: [D75686051](https://our.internmc.facebook.com/intern/diff/D75686051/) [ghstack-poisoned] --- backends/vulkan/runtime/VulkanBackend.cpp | 1 - backends/vulkan/runtime/graph/ComputeGraph.cpp | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp index 4a3d077bfb7..b32f4eb4308 100644 --- a/backends/vulkan/runtime/VulkanBackend.cpp +++ b/backends/vulkan/runtime/VulkanBackend.cpp @@ -569,7 +569,6 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface { if (should_propagate_resize) { compute_graph->propagate_resize(); - compute_graph->encode_execute(); } compute_graph->execute(); diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index a4a6abdd63f..ce9c700057a 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -696,6 +696,7 @@ void ComputeGraph::propagate_resize() { for (std::unique_ptr& node : execute_nodes_) { node->trigger_resize(this); } + encode_execute(); } } // namespace vkcompute