@@ -647,6 +647,7 @@ struct ggml_backend_sched {
647
647
// pipeline parallelism support
648
648
int n_copies;
649
649
int cur_copy;
650
+ int next_copy;
650
651
ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
651
652
struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
652
653
int n_graph_inputs;
@@ -1433,8 +1434,6 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
1433
1434
}
1434
1435
}
1435
1436
1436
- sched->cur_copy = (sched->cur_copy + 1 ) % sched->n_copies ;
1437
-
1438
1437
return GGML_STATUS_SUCCESS;
1439
1438
}
1440
1439
@@ -1535,10 +1534,10 @@ void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
1535
1534
bool ggml_backend_sched_reserve (ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
1536
1535
GGML_ASSERT ((int )sched->hash_set .size >= measure_graph->n_nodes + measure_graph->n_leafs );
1537
1536
1538
- ggml_backend_sched_split_graph (sched, measure_graph);
1539
-
1540
1537
ggml_backend_sched_synchronize (sched);
1541
1538
1539
+ ggml_backend_sched_split_graph (sched, measure_graph);
1540
+
1542
1541
if (!ggml_gallocr_reserve_n (sched->galloc , &sched->graph , sched->node_backend_ids , sched->leaf_backend_ids )) {
1543
1542
return false ;
1544
1543
}
@@ -1550,6 +1549,10 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
1550
1549
1551
1550
bool ggml_backend_sched_alloc_graph (ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
1552
1551
GGML_ASSERT ((int )sched->hash_set .size >= graph->n_nodes + graph->n_leafs );
1552
+ GGML_ASSERT (!sched->is_alloc );
1553
+
1554
+ sched->cur_copy = sched->next_copy ;
1555
+ sched->next_copy = (sched->next_copy + 1 ) % sched->n_copies ;
1553
1556
1554
1557
ggml_backend_sched_split_graph (sched, graph);
1555
1558
@@ -1590,7 +1593,7 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
1590
1593
// if the graph is not already allocated, always use copy 0 after a synchronization
1591
1594
// this ensures that during generation the same copy is used every time,
1592
1595
// which avoids changes in the graph that could cause CUDA or other graphs to be disabled
1593
- sched->cur_copy = 0 ;
1596
+ sched->next_copy = 0 ;
1594
1597
}
1595
1598
}
1596
1599
0 commit comments