|
2 | 2 |
|
3 | 3 | #include "log.h" |
4 | 4 | #include "sampling.h" |
| 5 | +#include "chat.h" |
5 | 6 |
|
6 | 7 | #include <algorithm> |
7 | 8 | #include <climits> |
@@ -2264,7 +2265,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex |
2264 | 2265 | ).set_env("LLAMA_LOG_VERBOSITY")); |
2265 | 2266 | add_opt(common_arg( |
2266 | 2267 | {"--log-prefix"}, |
2267 | | - "Enable prefx in log messages", |
| 2268 | + "Enable prefix in log messages", |
2268 | 2269 | [](common_params &) { |
2269 | 2270 | common_log_set_prefix(common_log_main(), true); |
2270 | 2271 | } |
@@ -2518,5 +2519,53 @@ common_params_context common_params_parser_init(common_params & params, llama_ex |
2518 | 2519 | } |
2519 | 2520 | ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); |
2520 | 2521 |
|
| 2522 | + add_opt(common_arg( |
| 2523 | + {"--fim-qwen-1.5b-default"}, |
| 2524 | + string_format("use default Qwen 2.5 Coder 1.5B (note: can download weights from the internet)"), |
| 2525 | + [](common_params & params) { |
| 2526 | + params.hf_repo = "ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF"; |
| 2527 | + params.hf_file = "qwen2.5-coder-1.5b-q8_0.gguf"; |
| 2528 | + params.port = 8012; |
| 2529 | + params.n_gpu_layers = 99; |
| 2530 | + params.flash_attn = true; |
| 2531 | + params.n_ubatch = 1024; |
| 2532 | + params.n_batch = 1024; |
| 2533 | + params.n_ctx = 0; |
| 2534 | + params.n_cache_reuse = 256; |
| 2535 | + } |
| 2536 | + ).set_examples({LLAMA_EXAMPLE_SERVER})); |
| 2537 | + |
| 2538 | + add_opt(common_arg( |
| 2539 | + {"--fim-qwen-3b-default"}, |
| 2540 | + string_format("use default Qwen 2.5 Coder 3B (note: can download weights from the internet)"), |
| 2541 | + [](common_params & params) { |
| 2542 | + params.hf_repo = "ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF"; |
| 2543 | + params.hf_file = "qwen2.5-coder-3b-q8_0.gguf"; |
| 2544 | + params.port = 8012; |
| 2545 | + params.n_gpu_layers = 99; |
| 2546 | + params.flash_attn = true; |
| 2547 | + params.n_ubatch = 1024; |
| 2548 | + params.n_batch = 1024; |
| 2549 | + params.n_ctx = 0; |
| 2550 | + params.n_cache_reuse = 256; |
| 2551 | + } |
| 2552 | + ).set_examples({LLAMA_EXAMPLE_SERVER})); |
| 2553 | + |
| 2554 | + add_opt(common_arg( |
| 2555 | + {"--fim-qwen-7b-default"}, |
| 2556 | + string_format("use default Qwen 2.5 Coder 7B (note: can download weights from the internet)"), |
| 2557 | + [](common_params & params) { |
| 2558 | + params.hf_repo = "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF"; |
| 2559 | + params.hf_file = "qwen2.5-coder-7b-q8_0.gguf"; |
| 2560 | + params.port = 8012; |
| 2561 | + params.n_gpu_layers = 99; |
| 2562 | + params.flash_attn = true; |
| 2563 | + params.n_ubatch = 1024; |
| 2564 | + params.n_batch = 1024; |
| 2565 | + params.n_ctx = 0; |
| 2566 | + params.n_cache_reuse = 256; |
| 2567 | + } |
| 2568 | + ).set_examples({LLAMA_EXAMPLE_SERVER})); |
| 2569 | + |
2521 | 2570 | return ctx_arg; |
2522 | 2571 | } |
0 commit comments