File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -8801,10 +8801,10 @@ static int llama_decode_impl(
88018801 // llama_synchronize(&lctx);
88028802
88038803 // decide if we need to defrag the kv cache
8804- if (cparams.causal_attn && cparams.defrag_thold >= 0 .0f ) {
8804+ if (cparams.causal_attn && cparams.defrag_thold > 0 .0f ) {
88058805 // - do not defrag small contexts (i.e. < 2048 tokens)
88068806 // - count the padding towards the number of used tokens
8807- const float fragmentation = kv_self.n >= 2048 ? 1 .0f - float (kv_self.used + llama_kv_cache_get_padding (cparams))/float (kv_self.n ) : 0 .0f ;
8807+ const float fragmentation = kv_self.n >= 2048 ? std::max ( 0 . 0f , 1 .0f - float (kv_self.used + llama_kv_cache_get_padding (cparams))/float (kv_self.n ) ) : 0 .0f ;
88088808
88098809 // queue defragmentation for next llama_kv_cache_update
88108810 if (fragmentation > cparams.defrag_thold ) {
You can’t perform that action at this time.
0 commit comments