Skip to content

Commit 34634ae

Browse files
committed
tweak to smartcache for contextshifting
1 parent 8a18e09 commit 34634ae

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

gpttype_adapter.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3810,11 +3810,16 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
38103810
bool blank_prompt = (addedmemory=="" && kcpp_data->prompt=="");
38113811

38123812
//smart cache logic
3813-
if(kcpp_data->smartcache)
3813+
if(kcpp_data->smartcache && file_format==FileFormat::GGUF_GENERIC)
38143814
{
3815+
bool shiftable = true;
3816+
if(!kcpp_data->use_contextshift || is_recurrent)
3817+
{
3818+
shiftable = false;
3819+
}
38153820
const float similarity_threshold = 0.7f;
38163821
//If CanBeShifted is true, do nothing. Allow shift as normal.
3817-
if(!CanContextShift(current_context_tokens, embd_inp, inputs.max_length, nctx))
3822+
if(!(shiftable && CanContextShift(current_context_tokens, embd_inp, inputs.max_length, nctx)))
38183823
{
38193824
// If CanBeShifted is false, calculate prefix similarity with current_context_tokens of current context
38203825
// If similarity > similarity_threshold, do nothing. Allow fast forward as normal.
@@ -3829,7 +3834,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
38293834
for(int i=0;i<savestate_limit;++i)
38303835
{
38313836
float similaritybeat = ComputePrefixMatchPercent(savestates[i].savestate_context_tokens,embd_inp);
3832-
if(similaritybeat > similarity_threshold || CanContextShift(savestates[i].savestate_context_tokens, embd_inp, inputs.max_length, nctx))
3837+
if(similaritybeat > similarity_threshold || (shiftable && CanContextShift(savestates[i].savestate_context_tokens, embd_inp, inputs.max_length, nctx)))
38333838
{
38343839
//found a match. save to the oldest slot thats not the one we are loading
38353840
int oldest_slot = get_oldest_slot(i);

0 commit comments

Comments
 (0)