Skip to content

Commit 6458c71

Browse files
committed
Merge branch 'master' into llamacli-tools
2 parents 1dd2e3b + 8a8c4ce commit 6458c71

37 files changed

+2219
-492
lines changed

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG MUSA_VERSION=rc3.1.0
3+
ARG MUSA_VERSION=rc3.1.1
44
# Target the MUSA build image
55
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
66

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ jobs:
443443
444444
ubuntu-22-cmake-musa:
445445
runs-on: ubuntu-22.04
446-
container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
446+
container: mthreads/musa:rc3.1.1-devel-ubuntu22.04
447447

448448
steps:
449449
- name: Clone

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,3 +521,17 @@ If your issue is with model generation quality, then please at least scan the fo
521521
522522
#### References
523523
524+
525+
### Completions
526+
Command-line completion is available for some environments.
527+
528+
#### Bash Completion
529+
```bash
530+
$ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash
531+
$ source ~/.llama-completion.bash
532+
```
533+
Optionally this can be added to your `.bashrc` or `.bash_profile` to load it
534+
automatically. For example:
535+
```console
536+
$ echo "source ~/.llama-completion.bash" >> ~/.bashrc
537+
```

common/arg.cpp

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,108 @@ static void common_params_print_usage(common_params_context & ctx_arg) {
365365
print_options(specific_options);
366366
}
367367

368+
static void common_params_print_completion(common_params_context & ctx_arg) {
369+
std::vector<common_arg *> common_options;
370+
std::vector<common_arg *> sparam_options;
371+
std::vector<common_arg *> specific_options;
372+
373+
for (auto & opt : ctx_arg.options) {
374+
if (opt.is_sparam) {
375+
sparam_options.push_back(&opt);
376+
} else if (opt.in_example(ctx_arg.ex)) {
377+
specific_options.push_back(&opt);
378+
} else {
379+
common_options.push_back(&opt);
380+
}
381+
}
382+
383+
printf("_llama_completions() {\n");
384+
printf(" local cur prev opts\n");
385+
printf(" COMPREPLY=()\n");
386+
printf(" cur=\"${COMP_WORDS[COMP_CWORD]}\"\n");
387+
printf(" prev=\"${COMP_WORDS[COMP_CWORD-1]}\"\n\n");
388+
389+
printf(" opts=\"");
390+
auto print_options = [](const std::vector<common_arg *> & options) {
391+
for (const common_arg * opt : options) {
392+
for (const char * arg : opt->args) {
393+
printf("%s ", arg);
394+
}
395+
}
396+
};
397+
398+
print_options(common_options);
399+
print_options(sparam_options);
400+
print_options(specific_options);
401+
printf("\"\n\n");
402+
403+
printf(" case \"$prev\" in\n");
404+
printf(" --model)\n");
405+
printf(" COMPREPLY=( $(compgen -f -X '!*.gguf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
406+
printf(" return 0\n");
407+
printf(" ;;\n");
408+
printf(" --grammar-file)\n");
409+
printf(" COMPREPLY=( $(compgen -f -X '!*.gbnf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
410+
printf(" return 0\n");
411+
printf(" ;;\n");
412+
printf(" *)\n");
413+
printf(" COMPREPLY=( $(compgen -W \"${opts}\" -- \"$cur\") )\n");
414+
printf(" return 0\n");
415+
printf(" ;;\n");
416+
printf(" esac\n");
417+
printf("}\n\n");
418+
419+
std::set<std::string> executables = {
420+
"llama-batched",
421+
"llama-batched-bench",
422+
"llama-bench",
423+
"llama-cli",
424+
"llama-convert-llama2c-to-ggml",
425+
"llama-cvector-generator",
426+
"llama-embedding",
427+
"llama-eval-callback",
428+
"llama-export-lora",
429+
"llama-gbnf-validator",
430+
"llama-gen-docs",
431+
"llama-gguf",
432+
"llama-gguf-hash",
433+
"llama-gguf-split",
434+
"llama-gritlm",
435+
"llama-imatrix",
436+
"llama-infill",
437+
"llama-llava-cli",
438+
"llama-llava-clip-quantize-cli",
439+
"llama-lookahead",
440+
"llama-lookup",
441+
"llama-lookup-create",
442+
"llama-lookup-merge",
443+
"llama-lookup-stats",
444+
"llama-minicpmv-cli",
445+
"llama-parallel",
446+
"llama-passkey",
447+
"llama-perplexity",
448+
"llama-q8dot",
449+
"llama-quantize",
450+
"llama-quantize-stats",
451+
"llama-qwen2vl-cli",
452+
"llama-retrieval",
453+
"llama-run",
454+
"llama-save-load-state",
455+
"llama-server",
456+
"llama-simple",
457+
"llama-simple-chat",
458+
"llama-speculative",
459+
"llama-speculative-simple",
460+
"llama-tokenize",
461+
"llama-tts",
462+
"llama-vdot"
463+
};
464+
465+
for (const auto& exe : executables) {
466+
printf("complete -F _llama_completions %s\n", exe.c_str());
467+
}
468+
}
469+
368470
static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & value) {
369471
std::vector<ggml_backend_dev_t> devices;
370472
auto dev_names = string_split<std::string>(value, ',');
@@ -426,6 +528,10 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
426528
}
427529
exit(0);
428530
}
531+
if (ctx_arg.params.completion) {
532+
common_params_print_completion(ctx_arg);
533+
exit(0);
534+
}
429535
} catch (const std::invalid_argument & ex) {
430536
fprintf(stderr, "%s\n", ex.what());
431537
ctx_arg.params = params_org;
@@ -494,6 +600,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
494600
exit(0);
495601
}
496602
));
603+
add_opt(common_arg(
604+
{"--completion-bash"},
605+
"print source-able bash completion script for llama.cpp",
606+
[](common_params & params) {
607+
params.completion = true;
608+
}
609+
));
497610
add_opt(common_arg(
498611
{"--verbose-prompt"},
499612
string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),
@@ -946,6 +1059,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
9461059
params.sampling.min_p = std::stof(value);
9471060
}
9481061
).set_sparam());
1062+
add_opt(common_arg(
1063+
{"--top-nsigma"}, "N",
1064+
string_format("top-n-sigma sampling (default: %.1f, -1.0 = disabled)", params.sampling.top_n_sigma),
1065+
[](common_params & params, const std::string & value) {
1066+
params.sampling.top_n_sigma = std::stof(value);
1067+
}
1068+
).set_examples({LLAMA_EXAMPLE_MAIN}).set_sparam());
9491069
add_opt(common_arg(
9501070
{"--xtc-probability"}, "N",
9511071
string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
@@ -1975,6 +2095,17 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
19752095
params.use_jinja = true;
19762096
}
19772097
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
2098+
add_opt(common_arg(
2099+
{"--reasoning-format"}, "FORMAT",
2100+
"reasoning format (default: deepseek; allowed values: deepseek, none)\n"
2101+
"controls whether thought tags are extracted from the response, and in which format they're returned. 'none' leaves thoughts unparsed in `message.content`, 'deepseek' puts them in `message.reasoning_content` (for DeepSeek R1 & Command R7B only).\n"
2102+
"only supported for non-streamed responses",
2103+
[](common_params & params, const std::string & value) {
2104+
/**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
2105+
else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
2106+
else { std::invalid_argument("invalid value"); }
2107+
}
2108+
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK"));
19782109
add_opt(common_arg(
19792110
{"--chat-template"}, "JINJA_TEMPLATE",
19802111
string_format(

0 commit comments

Comments
 (0)