Skip to content

Commit 74c0e68

Browse files
committed
Merge branch 'concedo_experimental' into no_mmvq_change
2 parents 91ca5ab + 6b6597e commit 74c0e68

File tree

26 files changed

+432
-846
lines changed

26 files changed

+432
-846
lines changed

common/arg.cpp

Lines changed: 58 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@
3939

4040
using json = nlohmann::ordered_json;
4141

42+
std::initializer_list<enum llama_example> mmproj_examples = {
43+
LLAMA_EXAMPLE_LLAVA,
44+
// TODO: add LLAMA_EXAMPLE_SERVER when it's ready
45+
};
46+
4247
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
4348
this->examples = std::move(examples);
4449
return *this;
@@ -642,11 +647,16 @@ static struct common_hf_file_res common_get_hf_file(const std::string &, const s
642647
// utils
643648
//
644649

645-
static void common_params_handle_model(
650+
struct handle_model_result {
651+
bool found_mmproj = false;
652+
common_params_model mmproj;
653+
};
654+
655+
static handle_model_result common_params_handle_model(
646656
struct common_params_model & model,
647657
const std::string & bearer_token,
648-
const std::string & model_path_default,
649-
bool is_mmproj = false) { // TODO: move is_mmproj to an enum when we have more files?
658+
const std::string & model_path_default) {
659+
handle_model_result result;
650660
// handle pre-fill default model path and url based on hf_repo and hf_file
651661
{
652662
if (!model.hf_repo.empty()) {
@@ -658,7 +668,12 @@ static void common_params_handle_model(
658668
exit(1); // built without CURL, error message already printed
659669
}
660670
model.hf_repo = auto_detected.repo;
661-
model.hf_file = is_mmproj ? auto_detected.mmprojFile : auto_detected.ggufFile;
671+
model.hf_file = auto_detected.ggufFile;
672+
if (!auto_detected.mmprojFile.empty()) {
673+
result.found_mmproj = true;
674+
result.mmproj.hf_repo = model.hf_repo;
675+
result.mmproj.hf_file = auto_detected.mmprojFile;
676+
}
662677
} else {
663678
model.hf_file = model.path;
664679
}
@@ -695,6 +710,8 @@ static void common_params_handle_model(
695710
exit(1);
696711
}
697712
}
713+
714+
return result;
698715
}
699716

700717
const std::vector<ggml_type> kv_cache_types = {
@@ -828,16 +845,25 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
828845
throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n");
829846
}
830847

831-
common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH);
832-
common_params_handle_model(params.speculative.model, params.hf_token, "");
833-
common_params_handle_model(params.vocoder.model, params.hf_token, "");
834-
835-
// allow --mmproj to be set from -hf
836-
// assuming that mmproj is always in the same repo as text model
837-
if (!params.model.hf_repo.empty() && ctx_arg.ex == LLAMA_EXAMPLE_LLAVA) {
838-
params.mmproj.hf_repo = params.model.hf_repo;
848+
// handle model and download
849+
{
850+
auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH);
851+
if (params.no_mmproj) {
852+
params.mmproj = {};
853+
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
854+
// optionally, handle mmproj model when -hf is specified
855+
params.mmproj = res.mmproj;
856+
}
857+
// only download mmproj if the current example is using it
858+
for (auto & ex : mmproj_examples) {
859+
if (ctx_arg.ex == ex) {
860+
common_params_handle_model(params.mmproj, params.hf_token, "");
861+
break;
862+
}
863+
}
864+
common_params_handle_model(params.speculative.model, params.hf_token, "");
865+
common_params_handle_model(params.vocoder.model, params.hf_token, "");
839866
}
840-
common_params_handle_model(params.mmproj, params.hf_token, "", true);
841867

842868
if (params.escape) {
843869
string_process_escapes(params.prompt);
@@ -969,7 +995,6 @@ static void common_params_print_completion(common_params_context & ctx_arg) {
969995
"llama-embedding",
970996
"llama-eval-callback",
971997
"llama-export-lora",
972-
"llama-gbnf-validator",
973998
"llama-gen-docs",
974999
"llama-gguf",
9751000
"llama-gguf-hash",
@@ -989,7 +1014,6 @@ static void common_params_print_completion(common_params_context & ctx_arg) {
9891014
"llama-perplexity",
9901015
"llama-q8dot",
9911016
"llama-quantize",
992-
"llama-quantize-stats",
9931017
"llama-qwen2vl-cli",
9941018
"llama-retrieval",
9951019
"llama-run",
@@ -2096,18 +2120,32 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20962120
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
20972121
add_opt(common_arg(
20982122
{"--mmproj"}, "FILE",
2099-
"path to a multimodal projector file for LLaVA. see examples/llava/README.md",
2123+
"path to a multimodal projector file. see examples/llava/README.md",
21002124
[](common_params & params, const std::string & value) {
21012125
params.mmproj.path = value;
21022126
}
2103-
).set_examples({LLAMA_EXAMPLE_LLAVA}));
2127+
).set_examples(mmproj_examples));
21042128
add_opt(common_arg(
21052129
{"--mmproj-url"}, "URL",
2106-
"URL to a multimodal projector file for LLaVA. see examples/llava/README.md",
2130+
"URL to a multimodal projector file. see examples/llava/README.md",
21072131
[](common_params & params, const std::string & value) {
21082132
params.mmproj.url = value;
21092133
}
2110-
).set_examples({LLAMA_EXAMPLE_LLAVA}));
2134+
).set_examples(mmproj_examples));
2135+
add_opt(common_arg(
2136+
{"--no-mmproj"},
2137+
"explicitly disable multimodal projector, useful when using -hf",
2138+
[](common_params & params) {
2139+
params.no_mmproj = true;
2140+
}
2141+
).set_examples(mmproj_examples));
2142+
add_opt(common_arg(
2143+
{"--no-mmproj-offload"},
2144+
"do not offload multimodal projector to GPU",
2145+
[](common_params & params) {
2146+
params.mmproj_use_gpu = false;
2147+
}
2148+
).set_examples(mmproj_examples));
21112149
add_opt(common_arg(
21122150
{"--image"}, "FILE",
21132151
"path to an image file. use with multimodal models. Specify multiple times for batching",
@@ -2382,6 +2420,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23822420
add_opt(common_arg(
23832421
{"-hf", "-hfr", "--hf-repo"}, "<user>/<model>[:quant]",
23842422
"Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n"
2423+
"mmproj is also downloaded automatically if available. to disable, add --no-mmproj\n"
23852424
"example: unsloth/phi-4-GGUF:q4_k_m\n"
23862425
"(default: unused)",
23872426
[](common_params & params, const std::string & value) {

common/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,8 @@ struct common_params {
338338

339339
// multimodal models (see examples/llava)
340340
struct common_params_model mmproj;
341+
bool mmproj_use_gpu = true; // use GPU for multimodal model
342+
bool no_mmproj = false; // explicitly disable multimodal model
341343
std::vector<std::string> image; // path to image file(s)
342344

343345
// embedding

docs/multimodal/gemma3.md

Lines changed: 0 additions & 51 deletions
This file was deleted.

docs/multimodal/glmedge.md

Lines changed: 0 additions & 43 deletions
This file was deleted.

0 commit comments

Comments
 (0)