Skip to content

Commit a06ed5f

Browse files
authored
llama : add simple option to enable CPU for MoE weights (--cpu-moe) (#14992)
1 parent 7845240 commit a06ed5f

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

common/arg.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2380,6 +2380,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23802380
}
23812381
}
23822382
));
2383+
add_opt(common_arg(
2384+
{"--cpu-moe"},
2385+
"use CPU for Mixture of Experts (MoE) weights",
2386+
[](common_params & params) {
2387+
params.tensor_buft_overrides.push_back({"\\.ffn_up_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2388+
params.tensor_buft_overrides.push_back({"\\.ffn_down_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2389+
params.tensor_buft_overrides.push_back({"\\.ffn_gate_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2390+
}
2391+
).set_env("LLAMA_ARG_CPU_MOE"));
23832392
add_opt(common_arg(
23842393
{"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N",
23852394
"number of layers to store in VRAM",

0 commit comments

Comments
 (0)