@@ -1106,7 +1106,7 @@ static void common_params_print_completion(common_params_context & ctx_arg) {
11061106 printf (" \"\n\n " );
11071107
11081108 printf (" case \" $prev\" in\n " );
1109- printf (" --model)\n " );
1109+ printf (" --model|-m )\n " );
11101110 printf (" COMPREPLY=( $(compgen -f -X '!*.gguf' -- \" $cur\" ) $(compgen -d -- \" $cur\" ) )\n " );
11111111 printf (" return 0\n " );
11121112 printf (" ;;\n " );
@@ -2555,15 +2555,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
25552555 {" --lora" }, " FNAME" ,
25562556 " path to LoRA adapter (can be repeated to use multiple adapters)" ,
25572557 [](common_params & params, const std::string & value) {
2558- params.lora_adapters .push_back ({ std::string (value), 1.0 , nullptr });
2558+ params.lora_adapters .push_back ({ std::string (value), 1.0 , " " , " " , nullptr });
25592559 }
25602560 // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
25612561 ).set_examples ({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
25622562 add_opt (common_arg (
25632563 {" --lora-scaled" }, " FNAME" , " SCALE" ,
25642564 " path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)" ,
25652565 [](common_params & params, const std::string & fname, const std::string & scale) {
2566- params.lora_adapters .push_back ({ fname, std::stof (scale), nullptr });
2566+ params.lora_adapters .push_back ({ fname, std::stof (scale), " " , " " , nullptr });
25672567 }
25682568 // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg
25692569 ).set_examples ({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}));
@@ -3538,6 +3538,22 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
35383538 }
35393539 ).set_examples ({LLAMA_EXAMPLE_SERVER}));
35403540
3541+ add_opt (common_arg (
3542+ {" --fim-qwen-30b-default" },
3543+ string_format (" use default Qwen 3 Coder 30B A3B Instruct (note: can download weights from the internet)" ),
3544+ [](common_params & params) {
3545+ params.model .hf_repo = " ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF" ;
3546+ params.model .hf_file = " qwen3-coder-30b-a3b-instruct-q8_0.gguf" ;
3547+ params.port = 8012 ;
3548+ params.n_gpu_layers = 99 ;
3549+ params.flash_attn = true ;
3550+ params.n_ubatch = 1024 ;
3551+ params.n_batch = 1024 ;
3552+ params.n_ctx = 0 ;
3553+ params.n_cache_reuse = 256 ;
3554+ }
3555+ ).set_examples ({LLAMA_EXAMPLE_SERVER}));
3556+
35413557 add_opt (common_arg (
35423558 { " --diffusion-steps" }, " N" ,
35433559 string_format (" number of diffusion steps (default: %d)" , params.diffusion .steps ),
0 commit comments