Skip to content

Commit 8833f22

Browse files
committed
llama : add simple option to enable CPU for MoE weights (--cpu-moe)
1 parent e08a988 commit 8833f22

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

common/arg.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2373,6 +2373,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23732373
}
23742374
}
23752375
));
2376+
add_opt(common_arg(
2377+
{"--cpu-moe"},
2378+
"use CPU for Mixture of Experts (MoE) weights",
2379+
[](common_params & params) {
2380+
params.tensor_buft_overrides.push_back({"\\.ffn_up_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2381+
params.tensor_buft_overrides.push_back({"\\.ffn_down_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2382+
params.tensor_buft_overrides.push_back({"\\.ffn_gate_exps\\.weight$", ggml_backend_cpu_buffer_type()});
2383+
}
2384+
).set_env("LLAMA_ARG_CPU_MOE"));
23762385
add_opt(common_arg(
23772386
{"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N",
23782387
"number of layers to store in VRAM",

0 commit comments

Comments
 (0)