File tree Expand file tree Collapse file tree 2 files changed +17
-1
lines changed Expand file tree Collapse file tree 2 files changed +17
-1
lines changed Original file line number Diff line number Diff line change @@ -105,7 +105,7 @@ llama_context::llama_context(
105105
106106 {
107107 const char * LLAMA_SET_ROWS = getenv (" LLAMA_SET_ROWS" );
108- const bool supports_set_rows = LLAMA_SET_ROWS ? (atoi (LLAMA_SET_ROWS) != 0 ) : false ;
108+ supports_set_rows = LLAMA_SET_ROWS ? (atoi (LLAMA_SET_ROWS) != 0 ) : false ;
109109
110110 if (!supports_set_rows && !cparams.kv_unified ) {
111111 LLAMA_LOG_WARN (" %s: non-unified KV cache requires ggml_set_rows() - forcing unified KV cache\n " , __func__);
@@ -899,6 +899,12 @@ int llama_context::encode(const llama_batch & batch_inp) {
899899 }
900900 }
901901
902+ if (!supports_set_rows) {
903+ // Reset state for the next token before backend sync, to allow the CPU activities in the reset to
904+ // overlap with device computation.
905+ ggml_backend_sched_reset (sched.get ());
906+ }
907+
902908 // TODO: hacky solution
903909 if (model.arch == LLM_ARCH_T5 && t_embd) {
904910 // cross.t_embd = t_embd;
@@ -1229,6 +1235,12 @@ int llama_context::decode(const llama_batch & batch_inp) {
12291235 // wait for the computation to finish (automatically done when obtaining the model output)
12301236 // synchronize();
12311237
1238+ if (!supports_set_rows) {
1239+ // Reset state for the next token before backend sync, to allow the CPU activities in the reset to
1240+ // overlap with device computation.
1241+ ggml_backend_sched_reset (sched.get ());
1242+ }
1243+
12321244 return 0 ;
12331245}
12341246
Original file line number Diff line number Diff line change @@ -287,6 +287,10 @@ struct llama_context {
287287
288288 bool has_evaluated_once = false ;
289289
290+ // env: LLAMA_SET_ROWS (temporary)
291+ // ref: https://github.com/ggml-org/llama.cpp/pull/14285
292+ bool supports_set_rows = false ;
293+
290294 // perf
291295 mutable int64_t t_start_us = 0 ;
292296 mutable int64_t t_load_us = 0 ;
You can’t perform that action at this time.
0 commit comments