|
1 | | -diff --git a/examples/common.cpp b/examples/common.cpp |
| 1 | +diff --git a/common/common.cpp b/common/common.cpp |
2 | 2 | index abcdef1..1234567 100644 |
3 | | ---- a/examples/common.cpp |
4 | | -+++ b/examples/common.cpp |
5 | | -@@ -123,6 +123,18 @@ void common_params_parser_init(const char * arg0, common_params * params, gpt_par |
6 | | - params->cache_type_v = llama_kv_cache_type_from_str(value.c_str()); |
7 | | - }) |
8 | | - ); |
| 3 | +--- a/common/common.cpp |
| 4 | ++++ b/common/common.cpp |
| 5 | +@@ -123,6 +123,18 @@ void common_params_parser_init(common_params * params) { |
| 6 | + params->cache_type_v = llama_model_kv_cache_type_from_str(value.c_str()); |
| 7 | + }); |
| 8 | + } |
9 | 9 | + |
10 | | -+ add_opt(common_arg( |
11 | | -+ {"--kvq"}, "BITS", |
12 | | -+ "Set both KV cache key and value quantization to same bits\nallowed values: 4, 8\n(default: 16 for FP16)", |
13 | | -+ [](common_params & params, const std::string & value) { |
| 10 | ++ { |
| 11 | ++ const auto & argp = gpt_params_args.add_arg({ |
| 12 | ++ "--kvq", "-kvq" |
| 13 | ++ }, "BITS", "Set both KV cache key and value quantization to same bits\nallowed values: 4, 8\n(default: 16 for FP16)"); |
| 14 | ++ argp.action = [&](const std::string & value) { |
14 | 15 | + try { |
15 | 16 | + int bits = std::stoi(value); |
16 | | -+ params->cache_type_k = bits == 4 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q8_0; |
17 | | -+ params->cache_type_v = bits == 4 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q8_0; |
| 17 | ++ params->cache_type_k = bits == 4 ? LLAMA_KV_CACHE_TYPE_Q4_0 : LLAMA_KV_CACHE_TYPE_Q8_0; |
| 18 | ++ params->cache_type_v = bits == 4 ? LLAMA_KV_CACHE_TYPE_Q4_0 : LLAMA_KV_CACHE_TYPE_Q8_0; |
18 | 19 | + } catch (const std::exception & e) {} |
19 | | -+ }) |
20 | | -+ ); |
| 20 | ++ }; |
| 21 | ++ } |
21 | 22 |
|
22 | | - add_opt(common_arg( |
23 | | - {"--cont-batching"}, "", |
| 23 | + // Add batching arguments |
| 24 | + { |
0 commit comments