3939
4040using json = nlohmann::ordered_json;
4141
42+ std::initializer_list<enum llama_example> mmproj_examples = {
43+ LLAMA_EXAMPLE_LLAVA,
44+ // TODO: add LLAMA_EXAMPLE_SERVER when it's ready
45+ };
46+
4247common_arg & common_arg::set_examples (std::initializer_list<enum llama_example> examples) {
4348 this ->examples = std::move (examples);
4449 return *this ;
@@ -642,11 +647,16 @@ static struct common_hf_file_res common_get_hf_file(const std::string &, const s
642647// utils
643648//
644649
645- static void common_params_handle_model (
650+ struct handle_model_result {
651+ bool found_mmproj = false ;
652+ common_params_model mmproj;
653+ };
654+
655+ static handle_model_result common_params_handle_model (
646656 struct common_params_model & model,
647657 const std::string & bearer_token,
648- const std::string & model_path_default,
649- bool is_mmproj = false ) { // TODO: move is_mmproj to an enum when we have more files?
658+ const std::string & model_path_default) {
659+ handle_model_result result;
650660 // handle pre-fill default model path and url based on hf_repo and hf_file
651661 {
652662 if (!model.hf_repo .empty ()) {
@@ -658,7 +668,12 @@ static void common_params_handle_model(
658668 exit (1 ); // built without CURL, error message already printed
659669 }
660670 model.hf_repo = auto_detected.repo ;
661- model.hf_file = is_mmproj ? auto_detected.mmprojFile : auto_detected.ggufFile ;
671+ model.hf_file = auto_detected.ggufFile ;
672+ if (!auto_detected.mmprojFile .empty ()) {
673+ result.found_mmproj = true ;
674+ result.mmproj .hf_repo = model.hf_repo ;
675+ result.mmproj .hf_file = auto_detected.mmprojFile ;
676+ }
662677 } else {
663678 model.hf_file = model.path ;
664679 }
@@ -695,6 +710,8 @@ static void common_params_handle_model(
695710 exit (1 );
696711 }
697712 }
713+
714+ return result;
698715}
699716
700717const std::vector<ggml_type> kv_cache_types = {
@@ -828,16 +845,25 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
828845 throw std::invalid_argument (" error: --prompt-cache-all not supported in interactive mode yet\n " );
829846 }
830847
831- common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH);
832- common_params_handle_model (params.speculative .model , params.hf_token , " " );
833- common_params_handle_model (params.vocoder .model , params.hf_token , " " );
834-
835- // allow --mmproj to be set from -hf
836- // assuming that mmproj is always in the same repo as text model
837- if (!params.model .hf_repo .empty () && ctx_arg.ex == LLAMA_EXAMPLE_LLAVA) {
838- params.mmproj .hf_repo = params.model .hf_repo ;
848+ // handle model and download
849+ {
850+ auto res = common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH);
851+ if (params.no_mmproj ) {
852+ params.mmproj = {};
853+ } else if (res.found_mmproj && params.mmproj .path .empty () && params.mmproj .url .empty ()) {
854+ // optionally, handle mmproj model when -hf is specified
855+ params.mmproj = res.mmproj ;
856+ }
857+ // only download mmproj if the current example is using it
858+ for (auto & ex : mmproj_examples) {
859+ if (ctx_arg.ex == ex) {
860+ common_params_handle_model (params.mmproj , params.hf_token , " " );
861+ break ;
862+ }
863+ }
864+ common_params_handle_model (params.speculative .model , params.hf_token , " " );
865+ common_params_handle_model (params.vocoder .model , params.hf_token , " " );
839866 }
840- common_params_handle_model (params.mmproj , params.hf_token , " " , true );
841867
842868 if (params.escape ) {
843869 string_process_escapes (params.prompt );
@@ -969,7 +995,6 @@ static void common_params_print_completion(common_params_context & ctx_arg) {
969995 " llama-embedding" ,
970996 " llama-eval-callback" ,
971997 " llama-export-lora" ,
972- " llama-gbnf-validator" ,
973998 " llama-gen-docs" ,
974999 " llama-gguf" ,
9751000 " llama-gguf-hash" ,
@@ -989,7 +1014,6 @@ static void common_params_print_completion(common_params_context & ctx_arg) {
9891014 " llama-perplexity" ,
9901015 " llama-q8dot" ,
9911016 " llama-quantize" ,
992- " llama-quantize-stats" ,
9931017 " llama-qwen2vl-cli" ,
9941018 " llama-retrieval" ,
9951019 " llama-run" ,
@@ -2096,18 +2120,32 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20962120 ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_NO_CONT_BATCHING" ));
20972121 add_opt (common_arg (
20982122 {" --mmproj" }, " FILE" ,
2099- " path to a multimodal projector file for LLaVA . see examples/llava/README.md" ,
2123+ " path to a multimodal projector file. see examples/llava/README.md" ,
21002124 [](common_params & params, const std::string & value) {
21012125 params.mmproj .path = value;
21022126 }
2103- ).set_examples ({LLAMA_EXAMPLE_LLAVA} ));
2127+ ).set_examples (mmproj_examples ));
21042128 add_opt (common_arg (
21052129 {" --mmproj-url" }, " URL" ,
2106- " URL to a multimodal projector file for LLaVA . see examples/llava/README.md" ,
2130+ " URL to a multimodal projector file. see examples/llava/README.md" ,
21072131 [](common_params & params, const std::string & value) {
21082132 params.mmproj .url = value;
21092133 }
2110- ).set_examples ({LLAMA_EXAMPLE_LLAVA}));
2134+ ).set_examples (mmproj_examples));
2135+ add_opt (common_arg (
2136+ {" --no-mmproj" },
2137+ " explicitly disable multimodal projector, useful when using -hf" ,
2138+ [](common_params & params) {
2139+ params.no_mmproj = true ;
2140+ }
2141+ ).set_examples (mmproj_examples));
2142+ add_opt (common_arg (
2143+ {" --no-mmproj-offload" },
2144+ " do not offload multimodal projector to GPU" ,
2145+ [](common_params & params) {
2146+ params.mmproj_use_gpu = false ;
2147+ }
2148+ ).set_examples (mmproj_examples));
21112149 add_opt (common_arg (
21122150 {" --image" }, " FILE" ,
21132151 " path to an image file. use with multimodal models. Specify multiple times for batching" ,
@@ -2382,6 +2420,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23822420 add_opt (common_arg (
23832421 {" -hf" , " -hfr" , " --hf-repo" }, " <user>/<model>[:quant]" ,
23842422 " Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n "
2423+ " mmproj is also downloaded automatically if available. to disable, add --no-mmproj\n "
23852424 " example: unsloth/phi-4-GGUF:q4_k_m\n "
23862425 " (default: unused)" ,
23872426 [](common_params & params, const std::string & value) {
0 commit comments