@@ -365,6 +365,108 @@ static void common_params_print_usage(common_params_context & ctx_arg) {
365365    print_options (specific_options);
366366}
367367
368+ static  void  common_params_print_completion (common_params_context & ctx_arg) {
369+     std::vector<common_arg *> common_options;
370+     std::vector<common_arg *> sparam_options;
371+     std::vector<common_arg *> specific_options;
372+ 
373+     for  (auto  & opt : ctx_arg.options ) {
374+         if  (opt.is_sparam ) {
375+             sparam_options.push_back (&opt);
376+         } else  if  (opt.in_example (ctx_arg.ex )) {
377+             specific_options.push_back (&opt);
378+         } else  {
379+             common_options.push_back (&opt);
380+         }
381+     }
382+ 
383+     printf (" _llama_completions() {\n " 
384+     printf ("     local cur prev opts\n " 
385+     printf ("     COMPREPLY=()\n " 
386+     printf ("     cur=\" ${COMP_WORDS[COMP_CWORD]}\"\n " 
387+     printf ("     prev=\" ${COMP_WORDS[COMP_CWORD-1]}\"\n\n " 
388+ 
389+     printf ("     opts=\" " 
390+     auto  print_options = [](const  std::vector<common_arg *> & options) {
391+         for  (const  common_arg * opt : options) {
392+             for  (const  char  * arg : opt->args ) {
393+                 printf (" %s " 
394+             }
395+         }
396+     };
397+ 
398+     print_options (common_options);
399+     print_options (sparam_options);
400+     print_options (specific_options);
401+     printf (" \"\n\n " 
402+ 
403+     printf ("     case \" $prev\"  in\n " 
404+     printf ("         --model)\n " 
405+     printf ("             COMPREPLY=( $(compgen -f -X '!*.gguf' -- \" $cur\" ) $(compgen -d -- \" $cur\" ) )\n " 
406+     printf ("             return 0\n " 
407+     printf ("             ;;\n " 
408+     printf ("         --grammar-file)\n " 
409+     printf ("             COMPREPLY=( $(compgen -f -X '!*.gbnf' -- \" $cur\" ) $(compgen -d -- \" $cur\" ) )\n " 
410+     printf ("             return 0\n " 
411+     printf ("             ;;\n " 
412+     printf ("         *)\n " 
413+     printf ("             COMPREPLY=( $(compgen -W \" ${opts}\"  -- \" $cur\" ) )\n " 
414+     printf ("             return 0\n " 
415+     printf ("             ;;\n " 
416+     printf ("     esac\n " 
417+     printf (" }\n\n " 
418+ 
419+     std::set<std::string> executables = {
420+         " llama-batched" 
421+         " llama-batched-bench" 
422+         " llama-bench" 
423+         " llama-cli" 
424+         " llama-convert-llama2c-to-ggml" 
425+         " llama-cvector-generator" 
426+         " llama-embedding" 
427+         " llama-eval-callback" 
428+         " llama-export-lora" 
429+         " llama-gbnf-validator" 
430+         " llama-gen-docs" 
431+         " llama-gguf" 
432+         " llama-gguf-hash" 
433+         " llama-gguf-split" 
434+         " llama-gritlm" 
435+         " llama-imatrix" 
436+         " llama-infill" 
437+         " llama-llava-cli" 
438+         " llama-llava-clip-quantize-cli" 
439+         " llama-lookahead" 
440+         " llama-lookup" 
441+         " llama-lookup-create" 
442+         " llama-lookup-merge" 
443+         " llama-lookup-stats" 
444+         " llama-minicpmv-cli" 
445+         " llama-parallel" 
446+         " llama-passkey" 
447+         " llama-perplexity" 
448+         " llama-q8dot" 
449+         " llama-quantize" 
450+         " llama-quantize-stats" 
451+         " llama-qwen2vl-cli" 
452+         " llama-retrieval" 
453+         " llama-run" 
454+         " llama-save-load-state" 
455+         " llama-server" 
456+         " llama-simple" 
457+         " llama-simple-chat" 
458+         " llama-speculative" 
459+         " llama-speculative-simple" 
460+         " llama-tokenize" 
461+         " llama-tts" 
462+         " llama-vdot" 
463+     };
464+ 
465+     for  (const  auto & exe : executables) {
466+         printf (" complete -F _llama_completions %s\n " c_str ());
467+     }
468+ }
469+ 
368470static  std::vector<ggml_backend_dev_t > parse_device_list (const  std::string & value) {
369471    std::vector<ggml_backend_dev_t > devices;
370472    auto  dev_names = string_split<std::string>(value, ' ,' 
@@ -426,6 +528,10 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
426528            }
427529            exit (0 );
428530        }
531+         if  (ctx_arg.params .completion ) {
532+             common_params_print_completion (ctx_arg);
533+             exit (0 );
534+         }
429535    } catch  (const  std::invalid_argument & ex) {
430536        fprintf (stderr, " %s\n " what ());
431537        ctx_arg.params  = params_org;
@@ -494,6 +600,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
494600            exit (0 );
495601        }
496602    ));
603+     add_opt (common_arg (
604+         {" --completion-bash" 
605+         " print source-able bash completion script for llama.cpp" 
606+         [](common_params & params) {
607+             params.completion  = true ;
608+         }
609+     ));
497610    add_opt (common_arg (
498611        {" --verbose-prompt" 
499612        string_format (" print a verbose prompt before generation (default: %s)" verbose_prompt  ? " true" " false" 
@@ -946,6 +1059,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
9461059            params.sampling .min_p  = std::stof (value);
9471060        }
9481061    ).set_sparam ());
1062+     add_opt (common_arg (
1063+         {" --top-nsigma" " N" 
1064+         string_format (" top-n-sigma sampling (default: %.1f, -1.0 = disabled)" sampling .top_n_sigma ),
1065+         [](common_params & params, const  std::string & value) {
1066+             params.sampling .top_n_sigma  = std::stof (value);
1067+         }
1068+     ).set_examples ({LLAMA_EXAMPLE_MAIN}).set_sparam ());
9491069    add_opt (common_arg (
9501070        {" --xtc-probability" " N" 
9511071        string_format (" xtc probability (default: %.1f, 0.0 = disabled)" double )params.sampling .xtc_probability ),
@@ -1975,6 +2095,17 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
19752095            params.use_jinja  = true ;
19762096        }
19772097    ).set_examples ({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env (" LLAMA_ARG_JINJA" 
2098+     add_opt (common_arg (
2099+         {" --reasoning-format" " FORMAT" 
2100+         " reasoning format (default: deepseek; allowed values: deepseek, none)\n " 
2101+         " controls whether thought tags are extracted from the response, and in which format they're returned. 'none' leaves thoughts unparsed in `message.content`, 'deepseek' puts them in `message.reasoning_content` (for DeepSeek R1 & Command R7B only).\n " 
2102+         " only supported for non-streamed responses" 
2103+         [](common_params & params, const  std::string & value) {
2104+             /* */ if  (value == " deepseek" reasoning_format  = COMMON_REASONING_FORMAT_DEEPSEEK; }
2105+             else  if  (value == " none" reasoning_format  = COMMON_REASONING_FORMAT_NONE; }
2106+             else  { std::invalid_argument (" invalid value" 
2107+         }
2108+     ).set_examples ({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env (" LLAMA_ARG_THINK" 
19782109    add_opt (common_arg (
19792110        {" --chat-template" " JINJA_TEMPLATE" 
19802111        string_format (
0 commit comments