File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed
Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -14556,10 +14556,12 @@ static int llama_decode_internal(
1455614556 ggml_set_cached_graph(lctx.sched,lctx.cached_graph.is_active);
1455714557
1455814558 // Disable future graph caching in presence of env var,
14559- // if there are multiple devices, or if batch size is greater than 1
14559+ // if there are multiple devices, if batch size is greater than 1,
14560+ // or if nsplits is not 2.
1456014561 // TO DO enable graph caching for these cases
1456114562 bool disable_cached_ggml_graph = (getenv("GGML_DISABLE_GRAPH_CACHING") != nullptr)
14562- || (llama_get_device_count(model) > 1);
14563+ || (llama_get_device_count(model) > 1)
14564+ || (ggml_backend_sched_get_n_splits(lctx.sched) != 2);
1456314565 for (int i = 0 ; i < gf->n_nodes; i++) {
1456414566 if (gf->nodes[i]->op == GGML_OP_ADD && gf->nodes[i]->src[1] && gf->nodes[i]->src[1]->ne[1] > 1) {
1456514567 disable_cached_ggml_graph = true;
You can’t perform that action at this time.
0 commit comments