From 249984cb44c93137579dfbdf0baa67be1cf75e6d Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Tue, 15 Jul 2025 16:05:07 -0700 Subject: [PATCH] [ET-VK] 6/n Split dispatches between multiple command buffers. Repurpose submit_current_cmd_and_wait to wait based on input flag. This diff makes changes to the `submit_current_cmd_and_wait` function in the `ComputeGraph` class to repurpose it to wait for command buffer completion based on an input flag. The function is renamed to `submit_current_cmd` and now takes an additional `wait` parameter. If `wait` is `true`, the function submits the command buffer to the GPU and waits for its completion. Otherwise, it only submits the command buffer without waiting. Differential Revision: [D78360042](https://our.internmc.facebook.com/intern/diff/D78360042/) [ghstack-poisoned] --- .../vulkan/runtime/graph/ComputeGraph.cpp | 29 ++++++++++--------- backends/vulkan/runtime/graph/ComputeGraph.h | 6 ++-- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index 8914f49a8ea..74931ce2dd7 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -759,11 +759,16 @@ void ComputeGraph::prepare_pipelines() { vkapi::ComputePipelineCache::Hasher>(); } -void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) { - vkapi::VulkanFence fence = context_->fences().get_fence(); - context_->submit_cmd_to_gpu(fence.get_submit_handle(), final_use); - fence.wait(); - context_->fences().return_fence(fence); +void ComputeGraph::submit_current_cmd(const bool final_use, bool wait) { + if (wait) { + // Submit and wait for command buffer + vkapi::VulkanFence fence = context_->fences().get_fence(); + context_->submit_cmd_to_gpu(fence.get_submit_handle(), final_use); + fence.wait(); + context_->fences().return_fence(fence); + } else { + context_->submit_cmd_to_gpu(VK_NULL_HANDLE, final_use); + } } void ComputeGraph::encode_prepack() { @@ -772,13 +777,9 @@ void ComputeGraph::encode_prepack() { } } -void ComputeGraph::prepack() const { +void ComputeGraph::prepack() { // Submit and execute the command buffer - vkapi::VulkanFence fence = context_->fences().get_fence(); - context_->submit_cmd_to_gpu(fence.get_submit_handle(), /*final_use = */ true); - fence.wait(); - context_->fences().return_fence(fence); - + submit_current_cmd(/*final_use = */ true, /*wait = */ true); context_->flush(); } @@ -791,7 +792,7 @@ void ComputeGraph::run_prepack() { size_t threshold = submitted ? config_.prepack_threshold_nbytes : config_.prepack_initial_threshold_nbytes; if (not_terminal && staging_nbytes_in_cmd_ > threshold) { - submit_current_cmd_and_wait(/*final_use=*/true); + submit_current_cmd(/*final_use=*/true, /*wait=*/true); context_->flush(); staging_nbytes_in_cmd_ = 0; context_->set_cmd(); @@ -801,7 +802,7 @@ void ComputeGraph::run_prepack() { node->encode(this); i++; } - submit_current_cmd_and_wait(/*final_use=*/true); + submit_current_cmd(/*final_use=*/true, /*wait=*/true); context_->flush(); staging_nbytes_in_cmd_ = 0; } @@ -823,7 +824,7 @@ void ComputeGraph::encode_execute() { void ComputeGraph::execute() { if (execute_pending_first_submission) { - submit_current_cmd_and_wait(/*final_use=*/false); + submit_current_cmd(/*final_use=*/false, /*wait=*/true); execute_pending_first_submission = false; } else { vkapi::VulkanFence fence = context_->fences().get_fence(); diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 763aa42d274..3a7ac0d4f50 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -825,9 +825,9 @@ class ComputeGraph final { /* * Submits the current command buffer in the Context to the GPU for execution, - * and wait for it to complete before returning. + * and wait for it to complete before returning, if wait is True. */ - void submit_current_cmd_and_wait(const bool final_use = false); + void submit_current_cmd(const bool final_use = false, bool wait = true); public: // @@ -839,7 +839,7 @@ class ComputeGraph final { } void encode_prepack(); - void prepack() const; + void prepack(); /* * Executes prepacking operations to transfer model weight data from the CPU