better way to avoid memory leaks in tensor_buft_overrides

slaren · slaren · commit a7c7ccbe8fa4 · 2025-08-05T00:38:16.000+02:00
adding a destructor to common_params would cause issues when the object is copied
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -24,6 +24,7 @@
 #include <cstdarg>
 #include <filesystem>
 #include <fstream>
+#include <list>
 #include <regex>
 #include <set>
 #include <string>
@@ -2375,15 +2376,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
                     }
                     throw std::invalid_argument("unknown buffer type");
                 }
-                params.tensor_buft_overrides.push_back({strdup(tensor_name.c_str()), buft_list.at(buffer_type)});
+                // keep strings alive and avoid leaking memory by storing them in a static vector
+                static std::list<std::string> buft_overrides;
+                buft_overrides.push_back(tensor_name);
+                params.tensor_buft_overrides.push_back({buft_overrides.back().c_str(), buft_list.at(buffer_type)});
             }
         }
     ));
     add_opt(common_arg(
         {"--cpu-moe", "-cmoe"},
         "keep all Mixture of Experts (MoE) weights in the CPU",
         [](common_params & params) {
-            params.tensor_buft_overrides.push_back({strdup("\\.ffn_(up|down|gate)_exps"), ggml_backend_cpu_buffer_type()});
+            params.tensor_buft_overrides.push_back({"\\.ffn_(up|down|gate)_exps", ggml_backend_cpu_buffer_type()});
         }
     ).set_env("LLAMA_ARG_CPU_MOE"));
     add_opt(common_arg(
@@ -2394,7 +2398,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
                 throw std::invalid_argument("invalid value");
             }
             for (int i = 0; i < value; ++i) {
-                params.tensor_buft_overrides.push_back({strdup(string_format("\\.%d\\.ffn_(up|down|gate)_exps", i).c_str()), ggml_backend_cpu_buffer_type()});
+                // keep strings alive and avoid leaking memory by storing them in a static vector
+                static std::list<std::string> buft_overrides;
+                buft_overrides.push_back(string_format("\\.ffn_(up|down|gate)_exps_%d", i));
+                params.tensor_buft_overrides.push_back({buft_overrides.back().c_str(), ggml_backend_cpu_buffer_type()});
             }
         }
     ).set_env("LLAMA_ARG_N_CPU_MOE"));
diff --git a/common/common.cpp b/common/common.cpp
@@ -1565,9 +1565,3 @@ ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std
 
     return result;
 }
-
-common_params::~common_params() {
-    for (auto & ot : tensor_buft_overrides) {
-        free(const_cast<char *>(ot.pattern));
-    }
-}
diff --git a/common/common.h b/common/common.h
@@ -241,8 +241,6 @@ enum common_reasoning_format {
 };
 
 struct common_params {
-    ~common_params();
-
     int32_t n_predict             =    -1; // new tokens to predict
     int32_t n_ctx                 =  4096; // context size
     int32_t n_batch               =  2048; // logical batch size for prompt processing (must be >=32 to use BLAS)

Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@`
`24`	`24`	`#include <cstdarg>`
`25`	`25`	`#include <filesystem>`
`26`	`26`	`#include <fstream>`
	`27`	`+#include <list>`
`27`	`28`	`#include <regex>`
`28`	`29`	`#include <set>`
`29`	`30`	`#include <string>`
`@@ -2375,15 +2376,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex`
`2375`	`2376`	`}`
`2376`	`2377`	`throw std::invalid_argument("unknown buffer type");`
`2377`	`2378`	`}`
`2378`		`- params.tensor_buft_overrides.push_back({strdup(tensor_name.c_str()), buft_list.at(buffer_type)});`
	`2379`	`+ // keep strings alive and avoid leaking memory by storing them in a static vector`
	`2380`	`+ static std::list<std::string> buft_overrides;`
	`2381`	`+ buft_overrides.push_back(tensor_name);`
	`2382`	`+ params.tensor_buft_overrides.push_back({buft_overrides.back().c_str(), buft_list.at(buffer_type)});`
`2379`	`2383`	`}`
`2380`	`2384`	`}`
`2381`	`2385`	`));`
`2382`	`2386`	`add_opt(common_arg(`
`2383`	`2387`	`{"--cpu-moe", "-cmoe"},`
`2384`	`2388`	`"keep all Mixture of Experts (MoE) weights in the CPU",`
`2385`	`2389`	`[](common_params & params) {`
`2386`		`- params.tensor_buft_overrides.push_back({strdup("\\.ffn_(up\|down\|gate)_exps"), ggml_backend_cpu_buffer_type()});`
	`2390`	`+ params.tensor_buft_overrides.push_back({"\\.ffn_(up\|down\|gate)_exps", ggml_backend_cpu_buffer_type()});`
`2387`	`2391`	`}`
`2388`	`2392`	`).set_env("LLAMA_ARG_CPU_MOE"));`
`2389`	`2393`	`add_opt(common_arg(`
`@@ -2394,7 +2398,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex`
`2394`	`2398`	`throw std::invalid_argument("invalid value");`
`2395`	`2399`	`}`
`2396`	`2400`	`for (int i = 0; i < value; ++i) {`
`2397`		`- params.tensor_buft_overrides.push_back({strdup(string_format("\\.%d\\.ffn_(up\|down\|gate)_exps", i).c_str()), ggml_backend_cpu_buffer_type()});`
	`2401`	`+ // keep strings alive and avoid leaking memory by storing them in a static vector`
	`2402`	`+ static std::list<std::string> buft_overrides;`
	`2403`	`+ buft_overrides.push_back(string_format("\\.ffn_(up\|down\|gate)_exps_%d", i));`
	`2404`	`+ params.tensor_buft_overrides.push_back({buft_overrides.back().c_str(), ggml_backend_cpu_buffer_type()});`
`2398`	`2405`	`}`
`2399`	`2406`	`}`
`2400`	`2407`	`).set_env("LLAMA_ARG_N_CPU_MOE"));`
Original file line number	Diff line number	Diff line change
`@@ -1565,9 +1565,3 @@ ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std`
`1565`	`1565`
`1566`	`1566`	`return result;`
`1567`	`1567`	`}`
`1568`		`-`
`1569`		`-common_params::~common_params() {`
`1570`		`- for (auto & ot : tensor_buft_overrides) {`
`1571`		`- free(const_cast<char *>(ot.pattern));`
`1572`		`- }`
`1573`		`-}`