@@ -5630,6 +5630,48 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
56305630 } else {
56315631 compute_ctx = ctx->compute_ctx .lock ();
56325632 }
5633+ } else {
5634+ switch (node->op ) {
5635+ case GGML_OP_REPEAT:
5636+ case GGML_OP_ACC:
5637+ case GGML_OP_GET_ROWS:
5638+ case GGML_OP_ADD:
5639+ case GGML_OP_MUL:
5640+ case GGML_OP_DIV:
5641+ case GGML_OP_CONCAT:
5642+ case GGML_OP_UPSCALE:
5643+ case GGML_OP_SCALE:
5644+ case GGML_OP_SQR:
5645+ case GGML_OP_SIN:
5646+ case GGML_OP_COS:
5647+ case GGML_OP_CLAMP:
5648+ case GGML_OP_PAD:
5649+ case GGML_OP_CPY:
5650+ case GGML_OP_CONT:
5651+ case GGML_OP_DUP:
5652+ case GGML_OP_NORM:
5653+ case GGML_OP_GROUP_NORM:
5654+ case GGML_OP_RMS_NORM:
5655+ case GGML_OP_UNARY:
5656+ case GGML_OP_DIAG_MASK_INF:
5657+ case GGML_OP_SOFT_MAX:
5658+ case GGML_OP_ROPE:
5659+ case GGML_OP_ARGSORT:
5660+ case GGML_OP_SUM_ROWS:
5661+ case GGML_OP_IM2COL:
5662+ case GGML_OP_TIMESTEP_EMBEDDING:
5663+ case GGML_OP_POOL_2D:
5664+ case GGML_OP_LEAKY_RELU:
5665+ {
5666+ // These operations all go through ggml_vk_op_f32, so short-circuit and
5667+ // do the only thing needed for the dryrun.
5668+ vk_pipeline pipeline = ggml_vk_op_get_pipeline (ctx, src0, src1, src2, node, node->op );
5669+ ggml_pipeline_request_descriptor_sets (ctx->device , pipeline, 1 );
5670+ return false ;
5671+ }
5672+ default :
5673+ break ;
5674+ }
56335675 }
56345676
56355677 switch (node->op ) {
@@ -6359,16 +6401,17 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
63596401 bool first_node_in_batch = true ; // true if next node will be first node in a batch
63606402 int submit_node_idx = 0 ; // index to first node in a batch
63616403
6362- // submit work every submit_count node to overlap CPU cmdbuffer generation with GPU execution
6363- constexpr int submit_count = 100 ;
6404+ // Submit work every nodes_per_submit nodes to overlap CPU cmdbuffer generation with GPU execution.
6405+ // Start with a smaller count to get work submitted right away, and increase it after each submit.
6406+ int nodes_per_submit = 20 ;
63646407 int submitted_nodes = 0 ;
6408+ int submit_count = 0 ;
63656409 for (int i = 0 ; i < cgraph->n_nodes ; i++) {
63666410 if (first_node_in_batch) {
63676411 submit_node_idx = i;
63686412 }
63696413
6370- bool submit = (submitted_nodes >= submit_count) || (i == last_node);
6371-
6414+ bool submit = (submitted_nodes >= nodes_per_submit) || (i == last_node);
63726415
63736416 bool enqueued = ggml_vk_build_graph (ctx, cgraph->nodes [i], i, cgraph->nodes [submit_node_idx], submit_node_idx, false , i == last_node, submit);
63746417
@@ -6385,6 +6428,15 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
63856428 if (submit) {
63866429 first_node_in_batch = true ;
63876430 submitted_nodes = 0 ;
6431+ switch (submit_count) {
6432+ case 0 :
6433+ nodes_per_submit = 50 ;
6434+ break ;
6435+ default :
6436+ nodes_per_submit = 100 ;
6437+ break ;
6438+ }
6439+ submit_count++;
63886440 }
63896441 }
63906442
0 commit comments