@@ -2548,7 +2548,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
25482548 {" --cpu-moe" , " -cmoe" },
25492549 " keep all Mixture of Experts (MoE) weights in the CPU" ,
25502550 [](common_params & params) {
2551- params.tensor_buft_overrides .push_back ({ " \\ .ffn_(up|down|gate)_exps " , ggml_backend_cpu_buffer_type ()} );
2551+ params.tensor_buft_overrides .push_back (llm_ffn_exps_cpu_override () );
25522552 }
25532553 ).set_env (" LLAMA_ARG_CPU_MOE" ));
25542554 add_opt (common_arg (
@@ -2561,7 +2561,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
25612561 for (int i = 0 ; i < value; ++i) {
25622562 // keep strings alive and avoid leaking memory by storing them in a static vector
25632563 static std::list<std::string> buft_overrides;
2564- buft_overrides.push_back (string_format ( " blk \\ .%d \\ .ffn_(up|down|gate)_exps " , i));
2564+ buft_overrides.push_back (llm_ffn_exps_block_regex ( i));
25652565 params.tensor_buft_overrides .push_back ({buft_overrides.back ().c_str (), ggml_backend_cpu_buffer_type ()});
25662566 }
25672567 }
@@ -2570,7 +2570,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
25702570 {" --cpu-moe-draft" , " -cmoed" },
25712571 " keep all Mixture of Experts (MoE) weights in the CPU for the draft model" ,
25722572 [](common_params & params) {
2573- params.speculative .tensor_buft_overrides .push_back ({ " \\ .ffn_(up|down|gate)_exps " , ggml_backend_cpu_buffer_type ()} );
2573+ params.speculative .tensor_buft_overrides .push_back (llm_ffn_exps_cpu_override () );
25742574 }
25752575 ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_CPU_MOE_DRAFT" ));
25762576 add_opt (common_arg (
@@ -2582,7 +2582,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
25822582 }
25832583 for (int i = 0 ; i < value; ++i) {
25842584 static std::list<std::string> buft_overrides_draft;
2585- buft_overrides_draft.push_back (string_format ( " blk \\ .%d \\ .ffn_(up|down|gate)_exps " , i));
2585+ buft_overrides_draft.push_back (llm_ffn_exps_block_regex ( i));
25862586 params.speculative .tensor_buft_overrides .push_back ({buft_overrides_draft.back ().c_str (), ggml_backend_cpu_buffer_type ()});
25872587 }
25882588 }
0 commit comments