File tree Expand file tree Collapse file tree 2 files changed +14
-2
lines changed
Expand file tree Collapse file tree 2 files changed +14
-2
lines changed Original file line number Diff line number Diff line change @@ -112,6 +112,10 @@ Model::initialize_context(const ModelConfig& model_config)
112112 llama_context_params ctx_params = llama_context_default_params ();
113113 ctx_params.n_ctx = model_config.n_ctx ;
114114 ctx_params.n_batch = model_config.n_batch ;
115+ ctx_params.n_threads = model_config.n_threads ;
116+ ctx_params.n_threads_batch = model_config.n_threads_batch ;
117+ ctx_params.type_k = model_config.cache_type_k ;
118+ ctx_params.type_v = model_config.cache_type_v ;
115119
116120 ctx_ = llama_init_from_model (weights_->get_model (), ctx_params);
117121 if (ctx_ == nullptr ) {
Original file line number Diff line number Diff line change 22
33#include " chat.h"
44#include " llama.h"
5+ #include < algorithm>
56#include < functional>
67#include < memory>
78#include < optional>
89#include < string>
10+ #include < thread>
911
1012namespace agent_cpp {
1113
@@ -20,11 +22,17 @@ struct ModelConfig
2022 int top_k = 0 ;
2123 float temp = 0 .0F ;
2224 uint32_t seed = LLAMA_DEFAULT_SEED;
23- // Chat format for parsing tool calls. When nullopt (default), the format
24- // is auto-detected from the model's chat template.
25+ // When nullopt (default), the format is auto-detected from the model's chat
26+ // template.
2527 std::optional<common_chat_format> chat_format = std::nullopt ;
2628 int n_ctx = 10240 ;
2729 int n_batch = -1 ;
30+ int n_threads =
31+ static_cast <int >(std::max(1u , std::thread::hardware_concurrency() - 1 ));
32+ int n_threads_batch =
33+ static_cast <int >(std::max(1u , std::thread::hardware_concurrency() - 1 ));
34+ ggml_type cache_type_k = GGML_TYPE_F16;
35+ ggml_type cache_type_v = GGML_TYPE_F16;
2836};
2937
3038// Forward declaration
You can’t perform that action at this time.
0 commit comments