Skip to content

Commit eec1e33

Browse files
authored
vulkan: allow graph_optimize for prompt processing workloads (#17475)
1 parent 879d673 commit eec1e33

File tree

1 file changed

+0
-18
lines changed

1 file changed

+0
-18
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13158,24 +13158,6 @@ static void ggml_vk_graph_optimize(ggml_backend_t backend, struct ggml_cgraph *
1315813158
return false;
1315913159
};
1316013160

13161-
// This function tries to reorder the graph to allow nodes to run in parallel.
13162-
// This helps with small batches, but for large batches its a slowdown, probably
13163-
// due to cache contention. So only reorder if the majority of nodes have few rows.
13164-
int num_small_nodes = 0;
13165-
int num_counted_nodes = 0;
13166-
for (int i = 0; i < graph->n_nodes; ++i) {
13167-
if (!is_empty(graph->nodes[i]) &&
13168-
graph->nodes[i]->op != GGML_OP_SET_ROWS) {
13169-
if (ggml_nrows(graph->nodes[i]) <= 8) {
13170-
num_small_nodes++;
13171-
}
13172-
num_counted_nodes++;
13173-
}
13174-
}
13175-
if (num_small_nodes < num_counted_nodes / 2) {
13176-
return;
13177-
}
13178-
1317913161
std::vector<ggml_tensor *> new_order;
1318013162
std::vector<bool> used(graph->n_nodes, false);
1318113163
std::set<ggml_tensor *> used_node_set;

0 commit comments

Comments
 (0)