@@ -68,6 +68,29 @@ struct llama_control_vector_load_info;
6868int32_t cpu_get_num_physical_cores ();
6969int32_t cpu_get_num_math ();
7070
71+ enum llama_example {
72+ LLAMA_EXAMPLE_COMMON,
73+ LLAMA_EXAMPLE_SPECULATIVE,
74+ LLAMA_EXAMPLE_MAIN,
75+ LLAMA_EXAMPLE_EMBEDDING,
76+ LLAMA_EXAMPLE_PERPLEXITY,
77+ LLAMA_EXAMPLE_RETRIEVAL,
78+ LLAMA_EXAMPLE_PASSKEY,
79+ LLAMA_EXAMPLE_IMATRIX,
80+ LLAMA_EXAMPLE_BENCH,
81+ LLAMA_EXAMPLE_SERVER,
82+ LLAMA_EXAMPLE_CVECTOR_GENERATOR,
83+ LLAMA_EXAMPLE_EXPORT_LORA,
84+ LLAMA_EXAMPLE_MTMD,
85+ LLAMA_EXAMPLE_LOOKUP,
86+ LLAMA_EXAMPLE_PARALLEL,
87+ LLAMA_EXAMPLE_TTS,
88+ LLAMA_EXAMPLE_DIFFUSION,
89+ LLAMA_EXAMPLE_FINETUNE,
90+
91+ LLAMA_EXAMPLE_COUNT,
92+ };
93+
7194//
7295// CLI argument parsing
7396//
@@ -86,6 +109,14 @@ enum common_reasoning_format {
86109 COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
87110};
88111
112+ struct model_paths {
113+ std::string path = " " ; // model local path // NOLINT
114+ std::string url = " " ; // model url to download // NOLINT
115+ std::string hf_repo = " " ; // HF repo // NOLINT
116+ std::string hf_file = " " ; // HF file // NOLINT
117+ std::string docker_repo = " " ; // Docker repo // NOLINT
118+ };
119+
89120struct gpt_params {
90121 uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
91122
@@ -230,8 +261,10 @@ struct gpt_params {
230261 std::string cache_type_k_draft = " " ; // KV cache data type for K for the draft model
231262 std::string cache_type_v_draft = " " ; // KV cache data type for V for the draft model
232263
233- // multimodal models (see examples/llava)
234- std::string mmproj = " " ; // path to multimodal projector
264+ // multimodal models (see examples/mtmd)
265+ model_paths mmproj;
266+ bool mmproj_use_gpu = true ; // use GPU for multimodal model
267+ bool no_mmproj = false ; // explicitly disable multimodal model
235268 std::vector<std::string> image; // path to image file(s)
236269
237270 // embedding
0 commit comments