Skip to content

Commit ea10f03

Browse files
committed
feat: Expose cache_type_k and cache_type_v.
1 parent e2ffcc9 commit ea10f03

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

src/model.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ Model::initialize_context(const ModelConfig& model_config)
112112
llama_context_params ctx_params = llama_context_default_params();
113113
ctx_params.n_ctx = model_config.n_ctx;
114114
ctx_params.n_batch = model_config.n_batch;
115+
ctx_params.n_threads = model_config.n_threads;
116+
ctx_params.n_threads_batch = model_config.n_threads_batch;
117+
ctx_params.type_k = model_config.cache_type_k;
118+
ctx_params.type_v = model_config.cache_type_v;
115119

116120
ctx_ = llama_init_from_model(weights_->get_model(), ctx_params);
117121
if (ctx_ == nullptr) {

src/model.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
#include "chat.h"
44
#include "llama.h"
5+
#include <algorithm>
56
#include <functional>
67
#include <memory>
78
#include <optional>
89
#include <string>
10+
#include <thread>
911

1012
namespace agent_cpp {
1113

@@ -20,11 +22,17 @@ struct ModelConfig
2022
int top_k = 0;
2123
float temp = 0.0F;
2224
uint32_t seed = LLAMA_DEFAULT_SEED;
23-
// Chat format for parsing tool calls. When nullopt (default), the format
24-
// is auto-detected from the model's chat template.
25+
// When nullopt (default), the format is auto-detected from the model's chat
26+
// template.
2527
std::optional<common_chat_format> chat_format = std::nullopt;
2628
int n_ctx = 10240;
2729
int n_batch = -1;
30+
int n_threads =
31+
static_cast<int>(std::max(1u, std::thread::hardware_concurrency() - 1));
32+
int n_threads_batch =
33+
static_cast<int>(std::max(1u, std::thread::hardware_concurrency() - 1));
34+
ggml_type cache_type_k = GGML_TYPE_F16;
35+
ggml_type cache_type_v = GGML_TYPE_F16;
2836
};
2937

3038
// Forward declaration

0 commit comments

Comments
 (0)