Skip to content

Commit 09f9994

Browse files
committed
llama : do not crash if there is no CPU backend
1 parent 15e0328 commit 09f9994

File tree

3 files changed

+23
-0
lines changed

3 files changed

+23
-0
lines changed

src/llama-adapter.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
253253
std::vector<ggml_backend_buffer_type_t> buft_extra;
254254
{
255255
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
256+
if (!cpu_dev) {
257+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
258+
}
256259
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
257260

258261
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
@@ -291,6 +294,9 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
291294
LLAMA_LOG_WARN("%s: lora for '%s' cannot use buft '%s', fallback to CPU\n", __func__, model_tensor->name, ggml_backend_buft_name(buft));
292295

293296
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
297+
if (!cpu_dev) {
298+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
299+
}
294300
buft = ggml_backend_dev_buffer_type(cpu_dev);
295301

296302
break;

src/llama-model-loader.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,10 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
823823
mmaps_used.reserve(files.size());
824824
for (const auto & file : files) {
825825
auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
826+
if (!reg) {
827+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
828+
}
829+
826830
auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
827831
std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn());
828832
mmaps_used.emplace_back(mapping->size(), 0);

src/llama-model.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,10 @@ static buft_list_t make_cpu_buft_list(const std::vector<ggml_backend_dev_t> & de
299299
// add extra buffer types, only if no GPU device is present
300300
// ref: https://github.com/ggml-org/llama.cpp/issues/12481#issuecomment-2743136094
301301
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
302+
if (cpu_dev == nullptr) {
303+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
304+
}
305+
302306
auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev);
303307
auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)
304308
ggml_backend_reg_get_proc_address(cpu_reg, "ggml_backend_dev_get_extra_bufts");
@@ -1484,6 +1488,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
14841488
}
14851489

14861490
ggml_backend_dev_t cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
1491+
if (cpu_dev == nullptr) {
1492+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
1493+
}
14871494
const int i_gpu_start = std::max((int) hparams.n_layer - n_gpu_layers, (int) 0);
14881495
const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1);
14891496
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
@@ -1672,6 +1679,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
16721679
auto * buft_dev = ggml_backend_buft_get_device(buft);
16731680
if (ml.use_mmap && buft_dev && buft == ggml_backend_dev_host_buffer_type(buft_dev)) {
16741681
auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
1682+
if (!cpu_dev) {
1683+
throw std::runtime_error("no CPU backend found");
1684+
}
16751685
buft = ggml_backend_dev_buffer_type(cpu_dev);
16761686
}
16771687

@@ -4122,6 +4132,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
41224132
if (!dev) {
41234133
// FIXME: workaround for CPU backend buft having a NULL device
41244134
dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
4135+
if (!dev) {
4136+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
4137+
}
41254138
}
41264139
ggml_backend_dev_props props;
41274140
ggml_backend_dev_get_props(dev, &props);

0 commit comments

Comments
 (0)