Skip to content

Commit 2214049

Browse files
committed
refactor: Make prompt_cache part of the Agent API.
1 parent 300f7ad commit 2214049

File tree

9 files changed

+78
-86
lines changed

9 files changed

+78
-86
lines changed

examples/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,3 @@ The [shared](./shared) directory contains reusable helper components used across
1212
| `chat_loop.h` | Interactive chat loop that reads user input from stdin and prints agent responses. Handles colored output for TTY terminals. |
1313
| `error_recovery_callback.h` | Callback that converts tool errors into JSON results, allowing the agent to see errors and retry gracefully instead of crashing. |
1414
| `logging_callback.h` | Callback that logs tool calls and their results to stderr. Useful for debugging and understanding agent behavior. |
15-
| `prompt_cache.h` | Utilities for building and caching the agent's system prompt tokens. Speeds up startup by reusing cached KV state. |

examples/context-engineering/context-engineering.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#include "llama.h"
99
#include "logging_callback.h"
1010
#include "model.h"
11-
#include "prompt_cache.h"
1211
#include "tool.h"
1312
#include <algorithm>
1413
#include <cstdio>
@@ -210,7 +209,7 @@ main(int argc, char** argv)
210209
agent_cpp::Agent agent(
211210
std::move(model), std::move(tools), std::move(callbacks), instructions);
212211

213-
load_or_create_agent_cache(agent, "context-engineering.cache");
212+
agent.load_or_create_cache("context-engineering.cache");
214213

215214
printf("\nContext Engineering Demo ready!\n");
216215
printf(" Try to ask multiple calculations (i.e. 3+4, then 4 * 6) and");

examples/memory/memory.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include "llama.h"
88
#include "logging_callback.h"
99
#include "model.h"
10-
#include "prompt_cache.h"
1110
#include "tool.h"
1211
#include <cstdio>
1312
#include <cstring>
@@ -326,7 +325,7 @@ main(int argc, char** argv)
326325

327326
agent_cpp::Agent agent(
328327
std::move(model), std::move(tools), std::move(callbacks), instructions);
329-
load_or_create_agent_cache(agent, "memory.cache");
328+
agent.load_or_create_cache("memory.cache");
330329

331330
printf("\nMemory Agent ready!\n");
332331
printf(" Try telling me your name, preferences, or ask to remember "

examples/multi-agent/multi-agent.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#include "llama.h"
99
#include "logging_callback.h"
1010
#include "model.h"
11-
#include "prompt_cache.h"
1211
#include "tool.h"
1312

1413
#include <cstdio>
@@ -190,7 +189,7 @@ class MainAgent
190189
std::move(callbacks),
191190
get_instructions());
192191

193-
load_or_create_agent_cache(*agent_, cache_path);
192+
agent_->load_or_create_cache(cache_path);
194193
}
195194

196195
agent_cpp::Agent& get() { return *agent_; }

examples/shared/prompt_cache.h

Lines changed: 0 additions & 74 deletions
This file was deleted.

examples/shell/shell.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include "error_recovery_callback.h"
77
#include "llama.h"
88
#include "model.h"
9-
#include "prompt_cache.h"
109
#include "tool.h"
1110

1211
#include <algorithm>
@@ -223,8 +222,10 @@ main(int argc, char** argv)
223222

224223
printf("Loading model...\n");
225224
std::shared_ptr<agent_cpp::Model> model;
225+
auto model_config = agent_cpp::ModelConfig{};
226+
model_config.n_ctx = 16384;
226227
try {
227-
model = agent_cpp::Model::create(model_path);
228+
model = agent_cpp::Model::create(model_path, model_config);
228229
} catch (const agent_cpp::ModelError& e) {
229230
fprintf(stderr, "error: %s\n", e.what());
230231
return 1;
@@ -246,7 +247,7 @@ main(int argc, char** argv)
246247
agent_cpp::Agent agent(
247248
std::move(model), std::move(tools), std::move(callbacks), instructions);
248249

249-
load_or_create_agent_cache(agent, "shell.cache");
250+
agent.load_or_create_cache("shell.cache");
250251

251252
printf("Shell Agent ready!\n");
252253
printf(" This agent can execute shell commands and scripts.\n");

examples/tracing/tracing.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#include "llama.h"
99
#include "logging_callback.h"
1010
#include "model.h"
11-
#include "prompt_cache.h"
1211
#include "tool.h"
1312

1413
#include <opentelemetry/exporters/otlp/otlp_http_exporter_factory.h>
@@ -284,7 +283,7 @@ main(int argc, char** argv)
284283
agent_cpp::Agent agent(
285284
std::move(model), std::move(tools), std::move(callbacks), instructions);
286285

287-
load_or_create_agent_cache(agent, "tracing.cache");
286+
agent.load_or_create_cache("tracing.cache");
288287

289288
printf("\nTracing Agent ready! Try asking me to do some calculations.\n");
290289
printf(" Type an empty line to quit.\n\n");

src/agent.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "error.h"
33
#include <algorithm>
44
#include <cstdio>
5+
#include <filesystem>
56

67
namespace agent_cpp {
78

@@ -144,4 +145,65 @@ Agent::run_loop(std::vector<common_chat_msg>& messages,
144145
}
145146
}
146147

148+
std::vector<llama_token>
149+
Agent::build_prompt_tokens()
150+
{
151+
if (!model) {
152+
return {};
153+
}
154+
155+
std::vector<common_chat_msg> system_messages;
156+
if (!instructions.empty()) {
157+
common_chat_msg system_msg;
158+
system_msg.role = "system";
159+
system_msg.content = instructions;
160+
system_messages.push_back(system_msg);
161+
}
162+
163+
std::vector<common_chat_tool> tool_definitions = get_tool_definitions();
164+
165+
common_chat_templates_inputs inputs;
166+
inputs.messages = system_messages;
167+
inputs.tools = tool_definitions;
168+
inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
169+
inputs.add_generation_prompt = false;
170+
inputs.enable_thinking = false;
171+
172+
auto params = common_chat_templates_apply(model->get_templates(), inputs);
173+
174+
return model->tokenize(params.prompt);
175+
}
176+
177+
bool
178+
Agent::load_or_create_cache(const std::string& cache_path)
179+
{
180+
if (!model) {
181+
return false;
182+
}
183+
184+
if (std::filesystem::exists(cache_path)) {
185+
auto cached_tokens = model->load_cache(cache_path);
186+
if (!cached_tokens.empty()) {
187+
printf("Loaded prompt cache from '%s' (%zu tokens)\n",
188+
cache_path.c_str(),
189+
cached_tokens.size());
190+
return true;
191+
}
192+
}
193+
194+
auto prompt_tokens = build_prompt_tokens();
195+
if (prompt_tokens.empty()) {
196+
return true;
197+
}
198+
199+
printf("Creating prompt cache at '%s' (%zu tokens)\n",
200+
cache_path.c_str(),
201+
prompt_tokens.size());
202+
203+
// warms the KV cache
204+
model->generate_from_tokens(prompt_tokens);
205+
206+
return model->save_cache(cache_path);
207+
}
208+
147209
} // namespace agent_cpp

src/agent.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ class Agent
4848

4949
// Get the model (for cache operations)
5050
[[nodiscard]] Model* get_model() const { return model.get(); }
51+
52+
// Load prompt cache from file, or create it if it doesn't exist
53+
// Returns true on success, false on failure
54+
bool load_or_create_cache(const std::string& cache_path);
55+
56+
private:
57+
// Build the agent's prompt tokens (system message + tool definitions)
58+
std::vector<llama_token> build_prompt_tokens();
5159
};
5260

5361
} // namespace agent_cpp

0 commit comments

Comments
 (0)