3838
3939using json = nlohmann::ordered_json;
4040
41+ std::initializer_list<enum llama_example> mmproj_examples = {
42+ LLAMA_EXAMPLE_LLAVA,
43+ LLAMA_EXAMPLE_SERVER,
44+ };
45+
4146common_arg & common_arg::set_examples (std::initializer_list<enum llama_example> examples) {
4247 this ->examples = std::move (examples);
4348 return *this ;
@@ -641,11 +646,16 @@ static struct common_hf_file_res common_get_hf_file(const std::string &, const s
641646// utils
642647//
643648
644- static void common_params_handle_model (
649+ struct handle_model_result {
650+ bool found_mmproj = false ;
651+ common_params_model mmproj;
652+ };
653+
654+ static handle_model_result common_params_handle_model (
645655 struct common_params_model & model,
646656 const std::string & bearer_token,
647- const std::string & model_path_default,
648- bool is_mmproj = false ) { // TODO: move is_mmproj to an enum when we have more files?
657+ const std::string & model_path_default) {
658+ handle_model_result result;
649659 // handle pre-fill default model path and url based on hf_repo and hf_file
650660 {
651661 if (!model.hf_repo .empty ()) {
@@ -657,7 +667,12 @@ static void common_params_handle_model(
657667 exit (1 ); // built without CURL, error message already printed
658668 }
659669 model.hf_repo = auto_detected.repo ;
660- model.hf_file = is_mmproj ? auto_detected.mmprojFile : auto_detected.ggufFile ;
670+ model.hf_file = auto_detected.ggufFile ;
671+ if (!auto_detected.mmprojFile .empty ()) {
672+ result.found_mmproj = true ;
673+ result.mmproj .hf_repo = model.hf_repo ;
674+ result.mmproj .hf_file = auto_detected.mmprojFile ;
675+ }
661676 } else {
662677 model.hf_file = model.path ;
663678 }
@@ -694,6 +709,8 @@ static void common_params_handle_model(
694709 exit (1 );
695710 }
696711 }
712+
713+ return result;
697714}
698715
699716const std::vector<ggml_type> kv_cache_types = {
@@ -827,18 +844,25 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
827844 throw std::invalid_argument (" error: --prompt-cache-all not supported in interactive mode yet\n " );
828845 }
829846
830- common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH);
831- common_params_handle_model (params.speculative .model , params.hf_token , " " );
832- common_params_handle_model (params.vocoder .model , params.hf_token , " " );
833-
834- // allow --mmproj to be set from -hf
835- // assuming that mmproj is always in the same repo as text model
836- if (!params.model .hf_repo .empty () && (
837- ctx_arg.ex == LLAMA_EXAMPLE_LLAVA || ctx_arg.ex == LLAMA_EXAMPLE_SERVER)) {
838- params.mmproj .hf_repo = params.model .hf_repo ;
847+ // handle model and download
848+ {
849+ auto res = common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH);
850+ if (params.no_mmproj ) {
851+ params.mmproj = {};
852+ } else if (res.found_mmproj && params.mmproj .path .empty () && params.mmproj .url .empty ()) {
853+ // optionally, handle mmproj model when -hf is specified
854+ params.mmproj = res.mmproj ;
855+ }
856+ // only download mmproj if the current example is using it
857+ for (auto & ex : mmproj_examples) {
858+ if (ctx_arg.ex == ex) {
859+ common_params_handle_model (params.mmproj , params.hf_token , " " );
860+ break ;
861+ }
862+ }
863+ common_params_handle_model (params.speculative .model , params.hf_token , " " );
864+ common_params_handle_model (params.vocoder .model , params.hf_token , " " );
839865 }
840- // TODO @ngxson : this will break non-vision model with -hf, need to fix before merging
841- common_params_handle_model (params.mmproj , params.hf_token , " " , true );
842866
843867 if (params.escape ) {
844868 string_process_escapes (params.prompt );
@@ -970,7 +994,6 @@ static void common_params_print_completion(common_params_context & ctx_arg) {
970994 " llama-embedding" ,
971995 " llama-eval-callback" ,
972996 " llama-export-lora" ,
973- " llama-gbnf-validator" ,
974997 " llama-gen-docs" ,
975998 " llama-gguf" ,
976999 " llama-gguf-hash" ,
@@ -990,7 +1013,6 @@ static void common_params_print_completion(common_params_context & ctx_arg) {
9901013 " llama-perplexity" ,
9911014 " llama-q8dot" ,
9921015 " llama-quantize" ,
993- " llama-quantize-stats" ,
9941016 " llama-qwen2vl-cli" ,
9951017 " llama-retrieval" ,
9961018 " llama-run" ,
@@ -2097,18 +2119,32 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20972119 ).set_examples ({LLAMA_EXAMPLE_SERVER}).set_env (" LLAMA_ARG_NO_CONT_BATCHING" ));
20982120 add_opt (common_arg (
20992121 {" --mmproj" }, " FILE" ,
2100- " path to a multimodal projector file for LLaVA . see examples/llava/README.md" ,
2122+ " path to a multimodal projector file. see examples/llava/README.md" ,
21012123 [](common_params & params, const std::string & value) {
21022124 params.mmproj .path = value;
21032125 }
2104- ).set_examples ({LLAMA_EXAMPLE_LLAVA, LLAMA_EXAMPLE_SERVER} ));
2126+ ).set_examples (mmproj_examples ));
21052127 add_opt (common_arg (
21062128 {" --mmproj-url" }, " URL" ,
2107- " URL to a multimodal projector file for LLaVA . see examples/llava/README.md" ,
2129+ " URL to a multimodal projector file. see examples/llava/README.md" ,
21082130 [](common_params & params, const std::string & value) {
21092131 params.mmproj .url = value;
21102132 }
2111- ).set_examples ({LLAMA_EXAMPLE_LLAVA, LLAMA_EXAMPLE_SERVER}));
2133+ ).set_examples (mmproj_examples));
2134+ add_opt (common_arg (
2135+ {" --no-mmproj" },
2136+ " explicitly disable multimodal projector, useful when using -hf" ,
2137+ [](common_params & params) {
2138+ params.no_mmproj = true ;
2139+ }
2140+ ).set_examples (mmproj_examples));
2141+ add_opt (common_arg (
2142+ {" --no-mmproj-offload" },
2143+ " do not offload multimodal projector to GPU" ,
2144+ [](common_params & params) {
2145+ params.mmproj_use_gpu = false ;
2146+ }
2147+ ).set_examples (mmproj_examples));
21122148 add_opt (common_arg (
21132149 {" --image" }, " FILE" ,
21142150 " path to an image file. use with multimodal models. Specify multiple times for batching" ,
@@ -2383,6 +2419,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
23832419 add_opt (common_arg (
23842420 {" -hf" , " -hfr" , " --hf-repo" }, " <user>/<model>[:quant]" ,
23852421 " Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n "
2422+ " mmproj is also downloaded automatically if available. to disable, add --no-mmproj\n "
23862423 " example: unsloth/phi-4-GGUF:q4_k_m\n "
23872424 " (default: unused)" ,
23882425 [](common_params & params, const std::string & value) {
0 commit comments