Skip to content

Commit 630e9a6

Browse files
slarenNexesenex
authored andcommitted
llama : add simple option to enable CPU for MoE weights (--cpu-moe) (ggml-org#14992)
1 parent 58f0509 commit 630e9a6

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

common/arg.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2379,6 +2379,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23792379
}
23802380
}
23812381
));
2382+
add_opt(common_arg(
2383+
{"--cpu-moe"},
2384+
"use CPU for Mixture of Experts (MoE) weights",
2385+
[](common_params & params) {
2386+
params.tensor_buft_overrides.push_back({"\\.ffn_up_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2387+
params.tensor_buft_overrides.push_back({"\\.ffn_down_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2388+
params.tensor_buft_overrides.push_back({"\\.ffn_gate_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2389+
}
2390+
).set_env("LLAMA_ARG_CPU_MOE"));
23822391
add_opt(common_arg(
23832392
{"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N",
23842393
"number of layers to store in VRAM",

0 commit comments

Comments
 (0)