Skip to content

Commit 7304640

Browse files
committed
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # .github/workflows/release.yml # docs/android.md # docs/backend/hexagon/CMakeUserPresets.json # examples/llama.android/app/src/main/res/layout/activity_main.xml # examples/llama.android/app/src/main/res/layout/item_message_assistant.xml # examples/llama.android/app/src/main/res/layout/item_message_user.xml # examples/model-conversion/scripts/causal/run-org-model.py # examples/model-conversion/scripts/utils/common.py # ggml/CMakeLists.txt # ggml/src/ggml-hexagon/CMakeLists.txt # ggml/src/ggml-hexagon/htp/CMakeLists.txt # ggml/src/ggml-hexagon/htp/matmul-ops.c # tests/test-arg-parser.cpp # tools/server/README.md
2 parents 714ab06 + 74e0513 commit 7304640

File tree

23 files changed

+1252
-560
lines changed

23 files changed

+1252
-560
lines changed

common/arg.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,11 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
774774
}
775775
auto opt = *arg_to_options[arg];
776776
std::string val;
777+
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
778+
// bool arg (need to reverse the meaning for negative args)
779+
bool is_neg = std::find(opt.args_neg.begin(), opt.args_neg.end(), arg) != opt.args_neg.end();
780+
val = is_neg ? "0" : "1";
781+
}
777782
if (opt.value_hint != nullptr) {
778783
// arg with single value
779784
check_arg(i);
@@ -1141,15 +1146,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
11411146
}
11421147
).set_env("LLAMA_ARG_CTX_CHECKPOINTS").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
11431148
add_opt(common_arg(
1144-
{"--cache-ram", "-cram"}, "N",
1149+
{"-cram", "--cache-ram"}, "N",
11451150
string_format("set the maximum cache size in MiB (default: %d, -1 - no limit, 0 - disable)"
11461151
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/16391)", params.cache_ram_mib),
11471152
[](common_params & params, int value) {
11481153
params.cache_ram_mib = value;
11491154
}
11501155
).set_env("LLAMA_ARG_CACHE_RAM").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
11511156
add_opt(common_arg(
1152-
{"--kv-unified", "-kvu"},
1157+
{"-kvu", "--kv-unified"},
11531158
"use single unified KV buffer shared across all sequences (default: enabled if number of slots is auto)",
11541159
[](common_params & params) {
11551160
params.kv_unified = true;
@@ -1417,7 +1422,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
14171422
}
14181423
).set_sparam());
14191424
add_opt(common_arg(
1420-
{"--sampling-seq", "--sampler-seq"}, "SEQUENCE",
1425+
{"--sampler-seq", "--sampling-seq"}, "SEQUENCE",
14211426
string_format("simplified sequence for samplers that will be used (default: %s)", sampler_type_chars.c_str()),
14221427
[](common_params & params, const std::string & value) {
14231428
params.sampling.samplers = common_sampler_types_from_chars(value);
@@ -2075,26 +2080,26 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20752080
}
20762081
));
20772082
add_opt(common_arg(
2078-
{"--override-tensor", "-ot"}, "<tensor name pattern>=<buffer type>,...",
2083+
{"-ot", "--override-tensor"}, "<tensor name pattern>=<buffer type>,...",
20792084
"override tensor buffer type", [](common_params & params, const std::string & value) {
20802085
parse_tensor_buffer_overrides(value, params.tensor_buft_overrides);
20812086
}
20822087
));
20832088
add_opt(common_arg(
2084-
{"--override-tensor-draft", "-otd"}, "<tensor name pattern>=<buffer type>,...",
2089+
{"-otd", "--override-tensor-draft"}, "<tensor name pattern>=<buffer type>,...",
20852090
"override tensor buffer type for draft model", [](common_params & params, const std::string & value) {
20862091
parse_tensor_buffer_overrides(value, params.speculative.tensor_buft_overrides);
20872092
}
20882093
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
20892094
add_opt(common_arg(
2090-
{"--cpu-moe", "-cmoe"},
2095+
{"-cmoe", "--cpu-moe"},
20912096
"keep all Mixture of Experts (MoE) weights in the CPU",
20922097
[](common_params & params) {
20932098
params.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override());
20942099
}
20952100
).set_env("LLAMA_ARG_CPU_MOE"));
20962101
add_opt(common_arg(
2097-
{"--n-cpu-moe", "-ncmoe"}, "N",
2102+
{"-ncmoe", "--n-cpu-moe"}, "N",
20982103
"keep the Mixture of Experts (MoE) weights of the first N layers in the CPU",
20992104
[](common_params & params, int value) {
21002105
if (value < 0) {
@@ -2109,14 +2114,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
21092114
}
21102115
).set_env("LLAMA_ARG_N_CPU_MOE"));
21112116
add_opt(common_arg(
2112-
{"--cpu-moe-draft", "-cmoed"},
2117+
{"-cmoed", "--cpu-moe-draft"},
21132118
"keep all Mixture of Experts (MoE) weights in the CPU for the draft model",
21142119
[](common_params & params) {
21152120
params.speculative.tensor_buft_overrides.push_back(llm_ffn_exps_cpu_override());
21162121
}
21172122
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_CPU_MOE_DRAFT"));
21182123
add_opt(common_arg(
2119-
{"--n-cpu-moe-draft", "-ncmoed"}, "N",
2124+
{"-ncmoed", "--n-cpu-moe-draft"}, "N",
21202125
"keep the Mixture of Experts (MoE) weights of the first N layers in the CPU for the draft model",
21212126
[](common_params & params, int value) {
21222127
if (value < 0) {
@@ -2644,7 +2649,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
26442649
}
26452650
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_EMBEDDINGS"));
26462651
add_opt(common_arg(
2647-
{"--reranking", "--rerank"},
2652+
{"--rerank", "--reranking"},
26482653
string_format("enable reranking endpoint on server (default: %s)", "disabled"),
26492654
[](common_params & params) {
26502655
params.embedding = true;
@@ -3115,7 +3120,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
31153120
}
31163121
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
31173122
add_opt(common_arg(
3118-
{"--draft-max", "--draft", "--draft-n"}, "N",
3123+
{"--draft", "--draft-n", "--draft-max"}, "N",
31193124
string_format("number of tokens to draft for speculative decoding (default: %d)", params.speculative.n_max),
31203125
[](common_params & params, int value) {
31213126
params.speculative.n_max = value;

common/preset.cpp

Lines changed: 187 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "preset.h"
33
#include "peg-parser.h"
44
#include "log.h"
5+
#include "download.h"
56

67
#include <fstream>
78
#include <sstream>
@@ -15,9 +16,13 @@ static std::string rm_leading_dashes(const std::string & str) {
1516
return str.substr(pos);
1617
}
1718

18-
std::vector<std::string> common_preset::to_args() const {
19+
std::vector<std::string> common_preset::to_args(const std::string & bin_path) const {
1920
std::vector<std::string> args;
2021

22+
if (!bin_path.empty()) {
23+
args.push_back(bin_path);
24+
}
25+
2126
for (const auto & [opt, value] : options) {
2227
args.push_back(opt.args.back()); // use the last arg as the main arg
2328
if (opt.value_hint == nullptr && opt.value_hint_2 == nullptr) {
@@ -63,6 +68,52 @@ std::string common_preset::to_ini() const {
6368
return ss.str();
6469
}
6570

71+
void common_preset::set_option(const common_preset_context & ctx, const std::string & env, const std::string & value) {
72+
// try if option exists, update it
73+
for (auto & [opt, val] : options) {
74+
if (opt.env && env == opt.env) {
75+
val = value;
76+
return;
77+
}
78+
}
79+
// if option does not exist, we need to add it
80+
if (ctx.key_to_opt.find(env) == ctx.key_to_opt.end()) {
81+
throw std::runtime_error(string_format(
82+
"%s: option with env '%s' not found in ctx_params",
83+
__func__, env.c_str()
84+
));
85+
}
86+
options[ctx.key_to_opt.at(env)] = value;
87+
}
88+
89+
void common_preset::unset_option(const std::string & env) {
90+
for (auto it = options.begin(); it != options.end(); ) {
91+
const common_arg & opt = it->first;
92+
if (opt.env && env == opt.env) {
93+
it = options.erase(it);
94+
return;
95+
} else {
96+
++it;
97+
}
98+
}
99+
}
100+
101+
bool common_preset::get_option(const std::string & env, std::string & value) const {
102+
for (const auto & [opt, val] : options) {
103+
if (opt.env && env == opt.env) {
104+
value = val;
105+
return true;
106+
}
107+
}
108+
return false;
109+
}
110+
111+
void common_preset::merge(const common_preset & other) {
112+
for (const auto & [opt, val] : other.options) {
113+
options[opt] = val; // overwrite existing options
114+
}
115+
}
116+
66117
static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_file(const std::string & path) {
67118
std::map<std::string, std::map<std::string, std::string>> parsed;
68119

@@ -172,9 +223,12 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke
172223
return value;
173224
}
174225

175-
common_presets common_presets_load(const std::string & path, common_params_context & ctx_params) {
226+
common_preset_context::common_preset_context(llama_example ex)
227+
: ctx_params(common_params_parser_init(default_params, ex)),
228+
key_to_opt(get_map_key_opt(ctx_params)) {}
229+
230+
common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const {
176231
common_presets out;
177-
auto key_to_opt = get_map_key_opt(ctx_params);
178232
auto ini_data = parse_ini_from_file(path);
179233

180234
for (auto section : ini_data) {
@@ -188,7 +242,7 @@ common_presets common_presets_load(const std::string & path, common_params_conte
188242
for (const auto & [key, value] : section.second) {
189243
LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str());
190244
if (key_to_opt.find(key) != key_to_opt.end()) {
191-
auto & opt = key_to_opt[key];
245+
const auto & opt = key_to_opt.at(key);
192246
if (is_bool_arg(opt)) {
193247
preset.options[opt] = parse_bool_arg(opt, key, value);
194248
} else {
@@ -199,8 +253,137 @@ common_presets common_presets_load(const std::string & path, common_params_conte
199253
// TODO: maybe warn about unknown key?
200254
}
201255
}
256+
257+
if (preset.name == "*") {
258+
// handle global preset
259+
global = preset;
260+
} else {
261+
out[preset.name] = preset;
262+
}
263+
}
264+
265+
return out;
266+
}
267+
268+
common_presets common_preset_context::load_from_cache() const {
269+
common_presets out;
270+
271+
auto cached_models = common_list_cached_models();
272+
for (const auto & model : cached_models) {
273+
common_preset preset;
274+
preset.name = model.to_string();
275+
preset.set_option(*this, "LLAMA_ARG_HF_REPO", model.to_string());
202276
out[preset.name] = preset;
203277
}
204278

205279
return out;
206280
}
281+
282+
struct local_model {
283+
std::string name;
284+
std::string path;
285+
std::string path_mmproj;
286+
};
287+
288+
common_presets common_preset_context::load_from_models_dir(const std::string & models_dir) const {
289+
if (!std::filesystem::exists(models_dir) || !std::filesystem::is_directory(models_dir)) {
290+
throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", models_dir.c_str()));
291+
}
292+
293+
std::vector<local_model> models;
294+
auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) {
295+
auto files = fs_list(subdir_path, false);
296+
common_file_info model_file;
297+
common_file_info first_shard_file;
298+
common_file_info mmproj_file;
299+
for (const auto & file : files) {
300+
if (string_ends_with(file.name, ".gguf")) {
301+
if (file.name.find("mmproj") != std::string::npos) {
302+
mmproj_file = file;
303+
} else if (file.name.find("-00001-of-") != std::string::npos) {
304+
first_shard_file = file;
305+
} else {
306+
model_file = file;
307+
}
308+
}
309+
}
310+
// single file model
311+
local_model model{
312+
/* name */ name,
313+
/* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path,
314+
/* path_mmproj */ mmproj_file.path // can be empty
315+
};
316+
if (!model.path.empty()) {
317+
models.push_back(model);
318+
}
319+
};
320+
321+
auto files = fs_list(models_dir, true);
322+
for (const auto & file : files) {
323+
if (file.is_dir) {
324+
scan_subdir(file.path, file.name);
325+
} else if (string_ends_with(file.name, ".gguf")) {
326+
// single file model
327+
std::string name = file.name;
328+
string_replace_all(name, ".gguf", "");
329+
local_model model{
330+
/* name */ name,
331+
/* path */ file.path,
332+
/* path_mmproj */ ""
333+
};
334+
models.push_back(model);
335+
}
336+
}
337+
338+
// convert local models to presets
339+
common_presets out;
340+
for (const auto & model : models) {
341+
common_preset preset;
342+
preset.name = model.name;
343+
preset.set_option(*this, "LLAMA_ARG_MODEL", model.path);
344+
if (!model.path_mmproj.empty()) {
345+
preset.set_option(*this, "LLAMA_ARG_MMPROJ", model.path_mmproj);
346+
}
347+
out[preset.name] = preset;
348+
}
349+
350+
return out;
351+
}
352+
353+
common_preset common_preset_context::load_from_args(int argc, char ** argv) const {
354+
common_preset preset;
355+
preset.name = COMMON_PRESET_DEFAULT_NAME;
356+
357+
bool ok = common_params_to_map(argc, argv, ctx_params.ex, preset.options);
358+
if (!ok) {
359+
throw std::runtime_error("failed to parse CLI arguments into preset");
360+
}
361+
362+
return preset;
363+
}
364+
365+
common_presets common_preset_context::cascade(const common_presets & base, const common_presets & added) const {
366+
common_presets out = base; // copy
367+
for (const auto & [name, preset_added] : added) {
368+
if (out.find(name) != out.end()) {
369+
// if exists, merge
370+
common_preset & target = out[name];
371+
target.merge(preset_added);
372+
} else {
373+
// otherwise, add directly
374+
out[name] = preset_added;
375+
}
376+
}
377+
return out;
378+
}
379+
380+
common_presets common_preset_context::cascade(const common_preset & base, const common_presets & presets) const {
381+
common_presets out;
382+
for (const auto & [name, preset] : presets) {
383+
common_preset tmp = base; // copy
384+
tmp.name = name;
385+
tmp.merge(preset);
386+
out[name] = std::move(tmp);
387+
}
388+
return out;
389+
}

0 commit comments

Comments
 (0)