diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index 14328027362..7775165bc68 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -151,6 +151,10 @@ ComputeGraph::ComputeGraph(GraphConfig config) config_.prepack_threshold_nbytes = 10 * MB; config_.prepack_initial_threshold_nbytes = 10 * MB; } + if (config_.execute_threshold_node_count == 0) { + config_.execute_threshold_node_count = 128; + config_.execute_initial_threshold_node_count = 64; + } } ComputeGraph::~ComputeGraph() { @@ -852,15 +856,38 @@ void ComputeGraph::execute() { context_->set_cmd(/*reusable = */ true); context_->cmd_reset_querypool(); + uint32_t encoded_node_count = 0; for (std::unique_ptr& node : execute_nodes_) { node->encode(this); + encoded_node_count++; + + // Threshold is reached when the node count reached + // execute_initial_threshold_node_count or if its a multiple of + // execute_threshold_node_count. + const bool reached_threshold = + encoded_node_count >= config_.execute_initial_threshold_node_count && + ((encoded_node_count - config_.execute_initial_threshold_node_count) % + config_.execute_threshold_node_count == + 0); + + // Create a new command buffer when threashold is reached + if (reached_threshold) { + context_->submit_cmd_to_gpu(VK_NULL_HANDLE, false); + deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd())); + context_->set_cmd(true); + } } + vkapi::VulkanFence fence = context_->fences().get_fence(); + context_->submit_cmd_to_gpu(fence.get_submit_handle(), false); + fence.wait(); + context_->fences().return_fence(fence); deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd())); + } else { + submit_deferred_cmds_and_wait(); } - submit_deferred_cmds_and_wait(); execute_count_++; } diff --git a/backends/vulkan/runtime/graph/GraphConfig.h b/backends/vulkan/runtime/graph/GraphConfig.h index 33c7ae73e62..08505aa3345 100644 --- a/backends/vulkan/runtime/graph/GraphConfig.h +++ b/backends/vulkan/runtime/graph/GraphConfig.h @@ -50,6 +50,17 @@ struct GraphConfig final { // by taking more advantage of parallelism between the CPU and GPU. size_t prepack_initial_threshold_nbytes = 0; + // During execute, once this node count is reached, submit the current + // command buffer for execution. This allows the work to be distributed over + // multiple command buffer submissions, which can improve execution + // performance. + size_t execute_threshold_node_count = 0; + // Execute node count used for the first command buffer submission during + // execute. This can be set to be lower than execute_threshold_nbytes to + // submit a command buffer for execution earlier which can improve performance + // by taking more advantage of parallelism between the CPU and GPU. + size_t execute_initial_threshold_node_count = 0; + vkapi::Adapter* external_adapter; // Generate a default graph config with pre-configured settings