@@ -768,6 +768,13 @@ void ComputeGraph::submit_current_cmd(const bool final_use, bool wait) {
768768 }
769769}
770770
771+ void ComputeGraph::wait_on_encode_execute () {
772+ if (encode_execute_fence_) {
773+ encode_execute_fence_.wait ();
774+ context_->fences ().return_fence (encode_execute_fence_);
775+ }
776+ }
777+
771778void ComputeGraph::prepack () {
772779 int i = 0 ;
773780 bool submitted = false ;
@@ -793,7 +800,7 @@ void ComputeGraph::prepack() {
793800 submit_current_cmd (/* final_use=*/ true , /* wait=*/ false );
794801 }
795802 staging_nbytes_in_cmd_ = 0 ;
796- context_->set_cmd ();
803+ context_->set_cmd (/* reusable = */ true );
797804 submitted = true ;
798805 }
799806
@@ -806,30 +813,33 @@ void ComputeGraph::prepack() {
806813}
807814
808815void ComputeGraph::encode_execute () {
816+ wait_on_encode_execute ();
809817 context_->flush ();
810818 context_->set_cmd (/* reusable = */ true );
811819
812820 context_->cmd_reset_querypool ();
821+ uint32_t encoded_node_count = 0 ;
813822
814823 for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
815824 node->encode (this );
825+ encoded_node_count++;
826+ if ((encoded_node_count % 64 ) == 0 ) {
827+ submit_current_cmd (/* final_use=*/ false , /* wait=*/ false );
828+ context_->set_cmd (true );
829+ }
816830 }
817831
818- // Indicate execute nodes have been freshly encoded and needs to be submitted
819- // first
820- execute_pending_first_submission = true ;
832+ encode_execute_fence_ = context_-> fences (). get_fence ();
833+ context_-> submit_cmd_to_gpu (
834+ encode_execute_fence_. get_submit_handle (), /* final_use= */ false ) ;
821835}
822836
823837void ComputeGraph::execute () {
824- if (execute_pending_first_submission) {
825- submit_current_cmd (/* final_use=*/ false , /* wait=*/ true );
826- execute_pending_first_submission = false ;
827- } else {
828- vkapi::VulkanFence fence = context_->fences ().get_fence ();
829- context_->submit_all_non_final_cmds (fence.get_submit_handle ());
830- fence.wait ();
831- context_->fences ().return_fence (fence);
832- }
838+ wait_on_encode_execute ();
839+ vkapi::VulkanFence fence = context_->fences ().get_fence ();
840+ context_->submit_all_non_final_cmds (fence.get_submit_handle ());
841+ fence.wait ();
842+ context_->fences ().return_fence (fence);
833843 execute_count_++;
834844}
835845
0 commit comments