|
2 | 2 |
|
3 | 3 | #include "log.h" |
4 | 4 | #include "sampling.h" |
| 5 | +#include "chat.h" |
5 | 6 |
|
6 | 7 | #include <algorithm> |
7 | 8 | #include <climits> |
@@ -2501,5 +2502,53 @@ common_params_context common_params_parser_init(common_params & params, llama_ex |
2501 | 2502 | } |
2502 | 2503 | ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); |
2503 | 2504 |
|
| 2505 | + add_opt(common_arg( |
| 2506 | + {"--fim-qwen-1.5b-default"}, |
| 2507 | + string_format("use default Qwen 2.5 Coder 1.5B (note: can download weights from the internet)"), |
| 2508 | + [](common_params & params) { |
| 2509 | + params.hf_repo = "ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF"; |
| 2510 | + params.hf_file = "qwen2.5-coder-1.5b-q8_0.gguf"; |
| 2511 | + params.port = 8012; |
| 2512 | + params.n_gpu_layers = 99; |
| 2513 | + params.flash_attn = true; |
| 2514 | + params.n_ubatch = 1024; |
| 2515 | + params.n_batch = 1024; |
| 2516 | + params.n_ctx = 0; |
| 2517 | + params.n_cache_reuse = 256; |
| 2518 | + } |
| 2519 | + ).set_examples({LLAMA_EXAMPLE_SERVER})); |
| 2520 | + |
| 2521 | + add_opt(common_arg( |
| 2522 | + {"--fim-qwen-3b-default"}, |
| 2523 | + string_format("use default Qwen 2.5 Coder 3B (note: can download weights from the internet)"), |
| 2524 | + [](common_params & params) { |
| 2525 | + params.hf_repo = "ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF"; |
| 2526 | + params.hf_file = "qwen2.5-coder-3b-q8_0.gguf"; |
| 2527 | + params.port = 8012; |
| 2528 | + params.n_gpu_layers = 99; |
| 2529 | + params.flash_attn = true; |
| 2530 | + params.n_ubatch = 1024; |
| 2531 | + params.n_batch = 1024; |
| 2532 | + params.n_ctx = 0; |
| 2533 | + params.n_cache_reuse = 256; |
| 2534 | + } |
| 2535 | + ).set_examples({LLAMA_EXAMPLE_SERVER})); |
| 2536 | + |
| 2537 | + add_opt(common_arg( |
| 2538 | + {"--fim-qwen-7b-default"}, |
| 2539 | + string_format("use default Qwen 2.5 Coder 7B (note: can download weights from the internet)"), |
| 2540 | + [](common_params & params) { |
| 2541 | + params.hf_repo = "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF"; |
| 2542 | + params.hf_file = "qwen2.5-coder-7b-q8_0.gguf"; |
| 2543 | + params.port = 8012; |
| 2544 | + params.n_gpu_layers = 99; |
| 2545 | + params.flash_attn = true; |
| 2546 | + params.n_ubatch = 1024; |
| 2547 | + params.n_batch = 1024; |
| 2548 | + params.n_ctx = 0; |
| 2549 | + params.n_cache_reuse = 256; |
| 2550 | + } |
| 2551 | + ).set_examples({LLAMA_EXAMPLE_SERVER})); |
| 2552 | + |
2504 | 2553 | return ctx_arg; |
2505 | 2554 | } |
0 commit comments