@@ -1361,15 +1361,15 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
13611361 std::vector<int32_t > ids;
13621362 std::vector<ggml_bitset_t > used_ids;
13631363
1364- for (int split_id = 0 ; split_id < sched->n_splits ; split_id ++) {
1365- struct ggml_backend_sched_split * split = &splits[split_id ];
1364+ for (int i = 0 ; i < sched->n_splits ; i ++) {
1365+ struct ggml_backend_sched_split * split = &splits[i ];
13661366 int split_backend_id = split->backend_id ;
13671367 ggml_backend_t split_backend = sched->backends [split_backend_id];
13681368
13691369 // copy the input tensors to the split backend
1370- for (int input_id = 0 ; input_id < split->n_inputs ; input_id ++) {
1371- ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend (sched, split->inputs [input_id ]);
1372- struct ggml_tensor * input = split->inputs [input_id ];
1370+ for (int j = 0 ; j < split->n_inputs ; j ++) {
1371+ ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend (sched, split->inputs [j ]);
1372+ struct ggml_tensor * input = split->inputs [j ];
13731373 struct ggml_tensor * input_cpy = tensor_copy (input, split_backend_id, sched->cur_copy );
13741374
13751375 if (input->flags & GGML_TENSOR_FLAG_INPUT) {
@@ -1404,30 +1404,17 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
14041404
14051405 // get the ids
14061406 ggml_tensor * ids_tensor = node->src [2 ];
1407- ggml_backend_t ids_backend = split_backend;
1408-
1409- // if the ids tensor is also an input of the split, it may not have been copied yet to the split backend
1410- // in that case, we use the original ids tensor
1411- for (int i = input_id + 1 ; i < split->n_inputs ; i++) {
1412- if (ids_tensor == tensor_copy (split->inputs [i], split_backend_id, sched->cur_copy )) {
1413- ids_tensor = split->inputs [i];
1414- ids_backend = ggml_backend_sched_get_tensor_backend (sched, split->inputs [i]);
1415- break ;
1416- }
1417- }
1418-
14191407 if (ids_tensor != prev_ids_tensor) {
14201408 ids.resize (ggml_nbytes (ids_tensor) / sizeof (int32_t ));
1421- ggml_backend_tensor_get_async (ids_backend , ids_tensor, ids.data (), 0 , ggml_nbytes (ids_tensor));
1422- ggml_backend_synchronize (ids_backend );
1409+ ggml_backend_tensor_get_async (split_backend , ids_tensor, ids.data (), 0 , ggml_nbytes (ids_tensor));
1410+ ggml_backend_synchronize (split_backend );
14231411
14241412 // find the used experts
14251413 used_ids.clear ();
14261414 used_ids.resize (ggml_bitset_size (n_expert));
14271415 for (int64_t i1 = 0 ; i1 < ids_tensor->ne [1 ]; i1++) {
14281416 for (int64_t i0 = 0 ; i0 < ids_tensor->ne [0 ]; i0++) {
14291417 int32_t id = ids[i1 * ids_tensor->nb [1 ]/sizeof (int32_t ) + i0 * ids_tensor->nb [0 ]/sizeof (int32_t )];
1430- GGML_ASSERT (id >= 0 && id < n_expert);
14311418 ggml_bitset_set (used_ids.data (), id);
14321419 }
14331420 }
0 commit comments