Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions ggml/src/ggml-backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,11 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
}
}
}
// if the node is still unassigned, assign it to the first backend that supports it
for (int b = 0; b < sched->n_backends && *cur_backend_id == -1; b++) {
ggml_backend_sched_set_if_supported(sched, node, b, cur_backend_id);
}
GGML_ASSERT(*cur_backend_id != -1);
}

// pass 5: split graph, find tensors that need to be copied
Expand Down Expand Up @@ -1098,7 +1103,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg

const int node_backend_id = tensor_backend_id(node);

assert(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback
GGML_ASSERT(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback

// check if we should start a new split based on the sources of the current node
bool need_new_split = false;
Expand Down Expand Up @@ -1156,7 +1161,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg

size_t src_id = hash_id(src);
const int src_backend_id = sched->hv_tensor_backend_ids[src_id];
assert(src_backend_id != -1); // all inputs should be assigned by now
GGML_ASSERT(src_backend_id != -1); // all inputs should be assigned by now

if (src->flags & GGML_TENSOR_FLAG_INPUT && sched->n_copies > 1) {
if (tensor_id_copy(src_id, src_backend_id, 0) == NULL) {
Expand Down
37 changes: 17 additions & 20 deletions ggml/src/ggml-cpu/ggml-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

// ggml-backend interface

std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type() {
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types() {
static std::vector<ggml_backend_buffer_type_t> bufts = []() {
std::vector<ggml_backend_buffer_type_t> bufts;

Expand All @@ -57,23 +57,27 @@ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type
}
#endif

bufts.push_back(NULL);

return bufts;
}();

return bufts;
}

static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
return ggml_backend_cpu_get_extra_buffers_type().data();
static std::vector<ggml_backend_buffer_type_t> extra_bufts = [] {
std::vector<ggml_backend_buffer_type_t> bufts = ggml_backend_cpu_get_extra_buffer_types();
bufts.push_back(nullptr);
return bufts;
}();

return extra_bufts.data();

GGML_UNUSED(device);
}

static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
for (auto * extra : ggml_backend_cpu_get_extra_buffers_type()) {
if (extra && extra == buft) {
for (auto * extra : ggml_backend_cpu_get_extra_buffer_types()) {
if (extra == buft) {
return true;
}
}
Expand Down Expand Up @@ -397,20 +401,13 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
return true;
}

// extra_buffer_op?
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
if (extra) {
auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
if (buf_extra && buf_extra->supports_op(dev, op)) {
return true;
}
}
}

// the other case need host buffer.
for (int i = 0; i < GGML_MAX_SRC; i++) {
if (op->src[i] && op->src[i]->buffer && !ggml_backend_buft_is_host(op->src[i]->buffer->buft)) {
return false;
// check extra buffer types
// note: only the first sources are checked for extra buffer types to reduce overhead, increase if necessary
for (int i = 0; i < 4; i++) {
if (op->src[i] && op->src[i]->buffer &&
ggml_backend_cpu_is_extra_buffer_type(op->src[i]->buffer->buft)) {
auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
return buf_extra->supports_op(dev, op);
}
}

Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-cpu/traits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ extra_buffer_type::~extra_buffer_type() {}
} // namespace ggml::cpu

bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) {
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
for (auto extra : ggml_backend_cpu_get_extra_buffer_types()) {
if (extra && extra->context) {
auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
auto tensor_traits = buf_extra->get_tensor_traits(op);
Expand All @@ -23,7 +23,7 @@ bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct
}

bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size) {
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
for (auto extra : ggml_backend_cpu_get_extra_buffer_types()) {
if (extra && extra->context) {
auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
auto tensor_traits = buf_extra->get_tensor_traits(op);
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cpu/traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ class extra_buffer_type {
} // namespace ggml::cpu

// implemented in ggml-cpu.cpp.
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type();
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types();

#endif
Loading