File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -8802,11 +8802,13 @@ static int llama_decode_impl(
88028802
88038803 // decide if we need to defrag the kv cache
88048804 if (cparams.causal_attn && cparams.defrag_thold >= 0 .0f ) {
8805- const float fragmentation = kv_self.n >= 128 ? 1 .0f - float (kv_self.used )/float (kv_self.n ) : 0 .0f ;
8805+ // - do not defrag small contexts (i.e. < 2048 tokens)
8806+ // - do not defrag if the padding is bigger than the defrag threshold
8807+ const float fragmentation = (kv_self.n >= 2048 && kv_self.n *cparams.defrag_thold >= llama_kv_cache_get_padding (cparams)) ? 1 .0f - float (kv_self.used )/float (kv_self.n ) : 0 .0f ;
88068808
88078809 // queue defragmentation for next llama_kv_cache_update
88088810 if (fragmentation > cparams.defrag_thold ) {
8809- // LLAMA_LOG_INFO(" fragmentation: %.2f\n", fragmentation);
8811+ LLAMA_LOG_DEBUG ( " %s: fragmentation: %.2f - requesting defrag \n " , __func__ , fragmentation);
88108812
88118813 llama_kv_cache_defrag (kv_self);
88128814 }
You can’t perform that action at this time.
0 commit comments