Skip to content

Commit 045e213

Browse files
committed
Refactor llama_get_memory(ctx) and llama_memory_ API.
Due to changes with `14030`
1 parent f70fef4 commit 045e213

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

examples/sweep-bench/sweep-bench.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ int main(int argc, char ** argv) {
9191

9292
//llama_context * ctx = llama_new_context_with_model(model, ctx_params);
9393
llama_context * ctx = llama_init_from_model(model, ctx_params);
94+
auto * mem = llama_get_memory(ctx);
9495

9596
if (ctx == NULL) {
9697
fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
@@ -165,7 +166,9 @@ int main(int argc, char ** argv) {
165166
//if (params.batch_warmup) {
166167
if (true) {
167168
// clean up KV cache after generation
168-
llama_kv_self_clear(ctx);
169+
// llama_kv_self_clear(ctx);
170+
llama_memory_clear(mem, true);
171+
169172

170173
// prepare batch of pp size for prompt processing performance measurement
171174
common_batch_clear(batch);
@@ -182,11 +185,13 @@ int main(int argc, char ** argv) {
182185

183186
common_batch_clear(batch);
184187
//llama_batch_clear(batch);
185-
llama_kv_self_clear(ctx);
188+
//llama_kv_self_clear(ctx);
189+
llama_memory_clear(mem, true);
186190

187191
for (unsigned int n_kv = 0; n_kv < n_kv_max; n_kv += params.n_ubatch) {
188192
// clean up KV cache before generation
189-
llama_kv_self_seq_rm(ctx, 0, n_kv, -1);
193+
//llama_kv_self_seq_rm(ctx, 0, n_kv, -1);
194+
llama_memory_seq_rm(mem, 0, n_kv, -1);
190195

191196
// first measure token generation performance at this context size
192197
const auto t_tg_start = ggml_time_us();
@@ -206,7 +211,8 @@ int main(int argc, char ** argv) {
206211
const auto t_tg_end = ggml_time_us();
207212

208213
// clean up KV cache after generation
209-
llama_kv_self_seq_rm(ctx, 0, n_kv, -1);
214+
//llama_kv_self_seq_rm(ctx, 0, n_kv, -1);
215+
llama_memory_seq_rm(mem, 0, n_kv, -1);
210216

211217
// prepare batch of pp size for prompt processing performance measurement
212218
common_batch_clear(batch);

0 commit comments

Comments
 (0)