Skip to content

Commit 9a638b0

Browse files
committed
[ET-VK] 8/n Split dispatches between multiple command buffers. This diff adds a config to limit the maximum number of command buffers created when splitting execution between multiple command buffers.
This diff introduces a new configuration option, `execute_max_cmds`, to limit the maximum number of command buffers created when splitting execution between multiple command buffers. This feature allows for more efficient management of command buffers, particularly in scenarios where the number of nodes in the graph is large. Differential Revision: [D79575908](https://our.internmc.facebook.com/intern/diff/D79575908/) [ghstack-poisoned]
1 parent 98c3a0f commit 9a638b0

File tree

2 files changed

+44
-6
lines changed

2 files changed

+44
-6
lines changed

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -858,21 +858,55 @@ void ComputeGraph::execute() {
858858
context_->cmd_reset_querypool();
859859
uint32_t encoded_node_count = 0;
860860

861+
const size_t total_node_count = execute_nodes_.size();
862+
size_t init_threshold = config_.execute_initial_threshold_node_count;
863+
size_t count_threshold = config_.execute_threshold_node_count;
864+
865+
// If max command buffer count is set, we need to adjust the thresholds to
866+
// accommodate execution within the limit, if total command buffers with
867+
// current thresholds would exceed execute_max_cmds
868+
if (config_.execute_max_cmds > 0) {
869+
// Worse case scenario we have one command buffer for nodes before init
870+
// threshold and config_.execute_max_cmds - 1 command buffers for the rest
871+
// of dispatches
872+
873+
// If command buffers created after offsetting init_threshold would exceed
874+
// max command buffer count, we need to adjust init and count thresholds
875+
const bool slicing_exceeds_max_cmds =
876+
(total_node_count - init_threshold) >
877+
count_threshold * (config_.execute_max_cmds - 1);
878+
if (total_node_count > init_threshold && slicing_exceeds_max_cmds) {
879+
// Calculate the total number of commands that would be created if we
880+
// were to slice the nodes using current thresholds
881+
const double total_cmds = 1. +
882+
ceil(double(total_node_count - init_threshold) / count_threshold);
883+
884+
// Calculate the scale factor to apply to the thresholds to ensure that
885+
// the total number of commands does not exceed max command buffer count
886+
const double cmd_scale = total_cmds / config_.execute_max_cmds;
887+
888+
// Apply the scale factor to the thresholds
889+
init_threshold = static_cast<size_t>(ceil(init_threshold * cmd_scale));
890+
count_threshold = static_cast<size_t>(ceil(
891+
double(total_node_count - init_threshold) /
892+
(config_.execute_max_cmds - 1)));
893+
}
894+
}
895+
861896
for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
862897
node->encode(this);
863898
encoded_node_count++;
864899

865900
// Threshold is reached when the node count reached
866901
// execute_initial_threshold_node_count or if its a multiple of
867902
// execute_threshold_node_count.
868-
const bool reached_threshold =
869-
encoded_node_count >= config_.execute_initial_threshold_node_count &&
870-
((encoded_node_count - config_.execute_initial_threshold_node_count) %
871-
config_.execute_threshold_node_count ==
872-
0);
903+
const bool reached_threshold = encoded_node_count >= init_threshold &&
904+
((encoded_node_count - init_threshold) % count_threshold == 0);
873905

874906
// Create a new command buffer when threashold is reached
875-
if (reached_threshold) {
907+
// But avoid it if this is the last node, since last cmd buf is submitted
908+
// after the loop
909+
if (reached_threshold && encoded_node_count != total_node_count) {
876910
context_->submit_cmd_to_gpu(VK_NULL_HANDLE, false);
877911
deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd()));
878912
context_->set_cmd(true);

backends/vulkan/runtime/graph/GraphConfig.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ struct GraphConfig final {
6161
// by taking more advantage of parallelism between the CPU and GPU.
6262
size_t execute_initial_threshold_node_count = 0;
6363

64+
// If this number is greater than 0 then, during execute create at most this
65+
// many command buffers.
66+
size_t execute_max_cmds = 0;
67+
6468
vkapi::Adapter* external_adapter;
6569

6670
// Generate a default graph config with pre-configured settings

0 commit comments

Comments
 (0)