File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -532,11 +532,13 @@ struct llama_batch_manager : public llama_batch_manager_i {
532
532
533
533
// decide if we need to defrag the kv cache
534
534
if (cparams.causal_attn && cparams.defrag_thold >= 0 .0f ) {
535
- const float fragmentation = kv_self.n >= 128 ? 1 .0f - float (kv_self.used )/float (kv_self.n ) : 0 .0f ;
535
+ // - do not defrag small contexts (i.e. < 2048 tokens)
536
+ // - count the padding towards the number of used tokens
537
+ const float fragmentation = kv_self.n >= 2048 ? 1 .0f - float (kv_self.used + lctx.get_ctx_padding (cparams))/float (kv_self.n ) : 0 .0f ;
536
538
537
539
// queue defragmentation for next llama_kv_cache_update
538
540
if (fragmentation > cparams.defrag_thold ) {
539
- // LLAMA_LOG_INFO(" fragmentation: %.2f\n", fragmentation);
541
+ LLAMA_LOG_DEBUG ( " %s: fragmentation: %.2f - requesting defrag \n " , __func__ , fragmentation);
540
542
541
543
kv_self.defrag ();
542
544
}
You can’t perform that action at this time.
0 commit comments