You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// copy a bit extra to ensure there are no NaNs in the padding
1429
+
// copy a bit extra at the to ensure there are no NaNs in the padding of the last expert
1430
+
// this is necessary for MMQ in the CUDA backend
1425
1431
expert_size_copy + padding_end);
1426
1432
};
1427
1433
1428
-
for (++it; it != unique_ids.end(); ++it) {
1429
-
constint32_t id = *it;
1434
+
int id = 0;
1435
+
while (!ggml_bitset_get(used_ids.data(), id)) {
1436
+
id++;
1437
+
}
1438
+
int32_t first_id = id;
1439
+
int32_t last_id = first_id;
1440
+
1441
+
for (++id; id < n_expert; ++id) {
1442
+
if (!ggml_bitset_get(used_ids.data(), id)) {
1443
+
continue;
1444
+
}
1430
1445
1431
1446
if (id == last_id + 1) {
1432
1447
last_id = id;
@@ -1439,19 +1454,18 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
1439
1454
last_id = id;
1440
1455
}
1441
1456
copy_experts(first_id, last_id);
1442
-
} else
1443
-
#endif
1444
-
1445
-
// try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events
1446
-
// TODO: add public function to facilitate this, since applications do not have direct access to the backend interface
1447
-
if (!split_backend->iface.cpy_tensor_async || !split_backend->iface.cpy_tensor_async(input_backend, split_backend, input, input_cpy)) {
1448
-
ggml_backend_synchronize(input_backend);
1449
-
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
// try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events
1459
+
// TODO: add public function to facilitate this, since applications do not have direct access to the backend interface
0 commit comments