Skip to content

Commit a7c7ccb

Browse files
committed
better way to avoid memory leaks in tensor_buft_overrides
adding a destructor to common_params would cause issues when the object is copied
1 parent 260e030 commit a7c7ccb

File tree

3 files changed

+10
-11
lines changed

3 files changed

+10
-11
lines changed

common/arg.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <cstdarg>
2525
#include <filesystem>
2626
#include <fstream>
27+
#include <list>
2728
#include <regex>
2829
#include <set>
2930
#include <string>
@@ -2375,15 +2376,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23752376
}
23762377
throw std::invalid_argument("unknown buffer type");
23772378
}
2378-
params.tensor_buft_overrides.push_back({strdup(tensor_name.c_str()), buft_list.at(buffer_type)});
2379+
// keep strings alive and avoid leaking memory by storing them in a static vector
2380+
static std::list<std::string> buft_overrides;
2381+
buft_overrides.push_back(tensor_name);
2382+
params.tensor_buft_overrides.push_back({buft_overrides.back().c_str(), buft_list.at(buffer_type)});
23792383
}
23802384
}
23812385
));
23822386
add_opt(common_arg(
23832387
{"--cpu-moe", "-cmoe"},
23842388
"keep all Mixture of Experts (MoE) weights in the CPU",
23852389
[](common_params & params) {
2386-
params.tensor_buft_overrides.push_back({strdup("\\.ffn_(up|down|gate)_exps"), ggml_backend_cpu_buffer_type()});
2390+
params.tensor_buft_overrides.push_back({"\\.ffn_(up|down|gate)_exps", ggml_backend_cpu_buffer_type()});
23872391
}
23882392
).set_env("LLAMA_ARG_CPU_MOE"));
23892393
add_opt(common_arg(
@@ -2394,7 +2398,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23942398
throw std::invalid_argument("invalid value");
23952399
}
23962400
for (int i = 0; i < value; ++i) {
2397-
params.tensor_buft_overrides.push_back({strdup(string_format("\\.%d\\.ffn_(up|down|gate)_exps", i).c_str()), ggml_backend_cpu_buffer_type()});
2401+
// keep strings alive and avoid leaking memory by storing them in a static vector
2402+
static std::list<std::string> buft_overrides;
2403+
buft_overrides.push_back(string_format("\\.ffn_(up|down|gate)_exps_%d", i));
2404+
params.tensor_buft_overrides.push_back({buft_overrides.back().c_str(), ggml_backend_cpu_buffer_type()});
23982405
}
23992406
}
24002407
).set_env("LLAMA_ARG_N_CPU_MOE"));

common/common.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,9 +1565,3 @@ ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std
15651565

15661566
return result;
15671567
}
1568-
1569-
common_params::~common_params() {
1570-
for (auto & ot : tensor_buft_overrides) {
1571-
free(const_cast<char *>(ot.pattern));
1572-
}
1573-
}

common/common.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,6 @@ enum common_reasoning_format {
241241
};
242242

243243
struct common_params {
244-
~common_params();
245-
246244
int32_t n_predict = -1; // new tokens to predict
247245
int32_t n_ctx = 4096; // context size
248246
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)

0 commit comments

Comments
 (0)