2424#include  < cstdarg> 
2525#include  < filesystem> 
2626#include  < fstream> 
27+ #include  < list> 
2728#include  < regex> 
2829#include  < set> 
2930#include  < string> 
@@ -2375,15 +2376,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23752376                    }
23762377                    throw  std::invalid_argument (" unknown buffer type"  );
23772378                }
2378-                 params.tensor_buft_overrides .push_back ({strdup (tensor_name.c_str ()), buft_list.at (buffer_type)});
2379+                 //  keep strings alive and avoid leaking memory by storing them in a static vector
2380+                 static  std::list<std::string> buft_overrides;
2381+                 buft_overrides.push_back (tensor_name);
2382+                 params.tensor_buft_overrides .push_back ({buft_overrides.back ().c_str (), buft_list.at (buffer_type)});
23792383            }
23802384        }
23812385    ));
23822386    add_opt (common_arg (
23832387        {" --cpu-moe"  , " -cmoe"  },
23842388        " keep all Mixture of Experts (MoE) weights in the CPU"  ,
23852389        [](common_params & params) {
2386-             params.tensor_buft_overrides .push_back ({strdup ( " \\ .ffn_(up|down|gate)_exps" ) , ggml_backend_cpu_buffer_type ()});
2390+             params.tensor_buft_overrides .push_back ({" \\ .ffn_(up|down|gate)_exps"  , ggml_backend_cpu_buffer_type ()});
23872391        }
23882392    ).set_env (" LLAMA_ARG_CPU_MOE"  ));
23892393    add_opt (common_arg (
@@ -2394,7 +2398,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23942398                throw  std::invalid_argument (" invalid value"  );
23952399            }
23962400            for  (int  i = 0 ; i < value; ++i) {
2397-                 params.tensor_buft_overrides .push_back ({strdup (string_format (" \\ .%d\\ .ffn_(up|down|gate)_exps"  , i).c_str ()), ggml_backend_cpu_buffer_type ()});
2401+                 //  keep strings alive and avoid leaking memory by storing them in a static vector
2402+                 static  std::list<std::string> buft_overrides;
2403+                 buft_overrides.push_back (string_format (" blk\\ .%d\\ .ffn_(up|down|gate)_exps"  , i));
2404+                 params.tensor_buft_overrides .push_back ({buft_overrides.back ().c_str (), ggml_backend_cpu_buffer_type ()});
23982405            }
23992406        }
24002407    ).set_env (" LLAMA_ARG_N_CPU_MOE"  ));
0 commit comments