Skip to content

Commit f981c58

Browse files
committed
Update on "[ET-VK] 5/n Split dispatches between multiple command buffers. Track previously submitted command buffers in context and add function to execute all previous command buffers."
The diff adds changes to store command buffers submitted with final_use set to false. Storing these buffers is necessary for `execute()` function. Since, `encode_execute()` function is typically called once but `execute()` can be called multiple times, `submit_all_non_final_cmds` function is added so all recorded command buffers with `final_use = False` can be called multiple times in `execute()`. #### Key Changes * Added a flag `execute_pending_first_submission` to the `ComputeGraph` class to track whether execute nodes have been freshly encoded and need to be submitted first. * Added a new function `submit_all_non_final_cmds` to the `Context` class, which submits all non-final command buffers to the GPU. * Modified the `submit_cmd_to_gpu` function to add the submitted command buffer to the `non_final_cmds_` list if it's not marked as final use. * Updated the `execute` function in `ComputeGraph` to submit all non-final command buffers before executing the graph. Differential Revision: [D78360038](https://our.internmc.facebook.com/intern/diff/D78360038/) [ghstack-poisoned]
2 parents 2b3dec5 + b7c5cab commit f981c58

File tree

4 files changed

+42
-52
lines changed

4 files changed

+42
-52
lines changed

backends/vulkan/runtime/api/Context.cpp

Lines changed: 1 addition & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -214,44 +214,14 @@ void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) {
214214

215215
submit_count_ = 0u;
216216
}
217-
218-
if (!final_use) {
219-
non_final_cmds_.emplace_back(std::move(cmd_));
220-
}
221-
}
222-
223-
void Context::submit_all_non_final_cmds(VkFence fence_handle) {
224-
VkSemaphore local_prev_semaphore = VK_NULL_HANDLE;
225-
226-
for (uint32_t i = 0; i < non_final_cmds_.size(); i++) {
227-
auto& cmd = non_final_cmds_[i];
228-
VkSemaphore wait_semaphore = local_prev_semaphore;
229-
VkSemaphore signal_semaphore = cmd.get_signal_semaphore();
230-
local_prev_semaphore = signal_semaphore;
231-
232-
if (cmd) {
233-
cmd.end();
234-
adapter_p_->submit_cmd(
235-
queue_,
236-
cmd.get_submit_handle(false),
237-
i == (non_final_cmds_.size() - 1) ? fence_handle : VK_NULL_HANDLE,
238-
wait_semaphore,
239-
signal_semaphore);
240-
}
241-
}
242217
}
243218

244219
void Context::flush() {
245-
VK_CHECK(vkQueueWaitIdle(queue()));
220+
VK_CHECK(vkQueueWaitIdle(queue().handle));
246221

247222
command_pool_.flush();
248223
descriptor_pool_.flush();
249224

250-
for (auto& cmd : non_final_cmds_) {
251-
cmd.invalidate();
252-
}
253-
non_final_cmds_.clear();
254-
255225
// If there is an existing command buffer, invalidate it
256226
if (cmd_) {
257227
cmd_.invalidate();

backends/vulkan/runtime/api/Context.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,6 @@ class Context final {
6868
// Command buffers submission
6969
std::mutex cmd_mutex_;
7070
vkapi::CommandBuffer cmd_;
71-
// List of submitted command buffers, not marked as final use.
72-
std::vector<vkapi::CommandBuffer> non_final_cmds_;
7371
// Semaphore for the previously submitted command buffer, if any
7472
VkSemaphore prev_semaphore_;
7573
uint32_t submit_count_;
@@ -92,8 +90,8 @@ class Context final {
9290
return device_;
9391
}
9492

95-
inline VkQueue queue() {
96-
return queue_.handle;
93+
inline vkapi::Adapter::Queue& queue() {
94+
return queue_;
9795
}
9896

9997
// Device Caches
@@ -232,7 +230,9 @@ class Context final {
232230
VkFence fence_handle = VK_NULL_HANDLE,
233231
const bool final_use = false);
234232

235-
void submit_all_non_final_cmds(VkFence fence_handle = VK_NULL_HANDLE);
233+
vkapi::CommandBuffer& extract_cmd() {
234+
return cmd_;
235+
}
236236

237237
void flush();
238238

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ ComputeGraph::~ComputeGraph() {
158158

159159
prepack_nodes_.clear();
160160
execute_nodes_.clear();
161+
deferred_cmd_list_.clear();
161162

162163
context_->flush();
163164
}
@@ -767,6 +768,30 @@ void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) {
767768
context_->fences().return_fence(fence);
768769
}
769770

771+
void ComputeGraph::submit_deferred_cmds() {
772+
VkSemaphore prev_semaphore = VK_NULL_HANDLE;
773+
vkapi::VulkanFence fence = context_->fences().get_fence();
774+
775+
for (uint32_t i = 0; i < deferred_cmd_list_.size(); i++) {
776+
auto& cmd = deferred_cmd_list_[i];
777+
VkSemaphore wait_semaphore = prev_semaphore;
778+
VkSemaphore signal_semaphore = cmd.get_signal_semaphore();
779+
prev_semaphore = signal_semaphore;
780+
781+
if (cmd) {
782+
cmd.end();
783+
context_->adapter_ptr()->submit_cmd(
784+
context_->queue(),
785+
cmd.get_submit_handle(false),
786+
i == (deferred_cmd_list_.size() - 1) ? fence.get_submit_handle() : VK_NULL_HANDLE,
787+
wait_semaphore,
788+
signal_semaphore);
789+
}
790+
}
791+
fence.wait();
792+
context_->fences().return_fence(fence);
793+
}
794+
770795
void ComputeGraph::prepack() {
771796
int i = 0;
772797
bool submitted = false;
@@ -805,6 +830,7 @@ void ComputeGraph::prepack() {
805830
}
806831

807832
void ComputeGraph::encode_execute() {
833+
deferred_cmd_list_.clear();
808834
context_->flush();
809835
context_->set_cmd(/*reusable = */ true);
810836

@@ -814,21 +840,11 @@ void ComputeGraph::encode_execute() {
814840
node->encode(this);
815841
}
816842

817-
// Indicate execute nodes have been freshly encoded and needs to be submitted
818-
// first
819-
execute_pending_first_submission = true;
843+
deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd()));
820844
}
821845

822846
void ComputeGraph::execute() {
823-
if (execute_pending_first_submission) {
824-
submit_current_cmd_and_wait(/*final_use=*/false);
825-
execute_pending_first_submission = false;
826-
} else {
827-
vkapi::VulkanFence fence = context_->fences().get_fence();
828-
context_->submit_all_non_final_cmds(fence.get_submit_handle());
829-
fence.wait();
830-
context_->fences().return_fence(fence);
831-
}
847+
submit_deferred_cmds();
832848
execute_count_++;
833849
}
834850

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,9 @@ class ComputeGraph final {
193193
// Utility constexpr to express byte quantities
194194
constexpr static size_t MB = 1024 * 1024;
195195

196+
// List of command buffers deferred for submission
197+
std::vector<vkapi::CommandBuffer> deferred_cmd_list_;
198+
196199
protected:
197200
size_t values_in_use_ = 0;
198201
size_t execute_count_ = 0;
@@ -204,10 +207,6 @@ class ComputeGraph final {
204207
// current Context's command buffer is submitted now.
205208
size_t staging_nbytes_in_cmd_ = 0;
206209

207-
// Flag to indicate if execute nodes have been freshly encoded and have not
208-
// been submitted yet.
209-
bool execute_pending_first_submission = false;
210-
211210
public:
212211
//
213212
// Accessors
@@ -855,6 +854,11 @@ class ComputeGraph final {
855854
*/
856855
void submit_current_cmd_and_wait(const bool final_use = false);
857856

857+
/*
858+
* Submits all the commands gathered in deferred_cmd_bufs_ to the GPU.
859+
*/
860+
void submit_deferred_cmds();
861+
858862
public:
859863
//
860864
// Graph Prepacking

0 commit comments

Comments
 (0)