@@ -427,6 +427,10 @@ void print_tok_vec(std::vector<float> &embd)
427427 const float SCTruncationRatio = 0.5 ; // ratio for how many tokens to fast forward
428428 const int SCTokThreshold = 32 + (nctx*0.05 ); // how many tokens of similarity triggers smartcontext
429429
430+ // printf("\nORIGINAL CTX:\n");
431+ // print_tok_vec(current_context_tokens);
432+ // printf("\nORIGINAL EMBD:\n");
433+ // print_tok_vec(embd_inp);
430434
431435 // fast forward the past based on identical tokens, stop once a divergence is noted
432436 int embd_inp_len = embd_inp.size ();
@@ -474,6 +478,10 @@ void print_tok_vec(std::vector<float> &embd)
474478 last_n_tokens.erase (last_n_tokens.begin (), last_n_tokens.begin () + n_past);
475479 embd_inp.erase (embd_inp.begin (), embd_inp.begin () + n_past);
476480 embd_inp_len = embd_inp.size ();
481+
482+ // printf("\nconds: %d %d %d\n",current_context_tokens.size() >= nctx*0.8
483+ // embd_inp_len >= nctx*0.6 ,current_context_tokens.size() - n_past > nctx*0.5);
484+ // printf("csiz:%d par:%d eilen:%d np:%d",current_context_tokens.size(), (int)(nctx*0.8),embd_inp_len,n_past);
477485 }
478486
479487 // smart context mode, detect if we have a shifted context at max length
@@ -482,7 +490,15 @@ void print_tok_vec(std::vector<float> &embd)
482490
483491 if (fastforwardok && useSmartContext && smartcontext.size () > 0 && embd_inp_len >= SCInpLenThreshold)
484492 {
493+ // printf("curfullcontext:\n");
494+ // print_tok_vec(current_context_tokens);
495+
485496 // see if smartcontext is still usable
497+ // printf("smartctx:\n");
498+ // print_tok_vec(smartcontext);
499+ // printf("embinp:\n");
500+ // print_tok_vec(embd_inp);
501+
486502 auto shared = LongestCommonSubseq (smartcontext, embd_inp);
487503 if (shared.size () > SCTokThreshold && ArrStartWith (smartcontext, shared)) // at least 32 tokens in common
488504 {
@@ -492,6 +508,8 @@ void print_tok_vec(std::vector<float> &embd)
492508 auto trimmed = std::vector<int >(embd_inp.begin () + found, embd_inp.end ());
493509 embd_inp = trimmed;
494510 embd_inp_len = embd_inp.size ();
511+ // printf("trimmed:\n");
512+ // print_tok_vec(embd_inp,&vocab.id_to_token);
495513 printf (" \n [Reusing Smart Context: %d allowance remaining]" , found);
496514
497515 int old_n_past = n_past;
@@ -503,6 +521,7 @@ void print_tok_vec(std::vector<float> &embd)
503521
504522 for (int i = n_past; i < current_context_tokens.size (); ++i)
505523 {
524+ // printf("\n%s and %s\n",vocab.id_to_token[current_context_tokens[i]].c_str(), vocab.id_to_token[embd_inp[i-offset_fix]].c_str());
506525 if (current_context_tokens[i] == embd_inp[i-offset_fix])
507526 {
508527 n_past += 1 ;
@@ -520,7 +539,8 @@ void print_tok_vec(std::vector<float> &embd)
520539
521540 last_n_tokens.erase (last_n_tokens.begin (), last_n_tokens.begin () + (n_past-old_n_past));
522541 embd_inp.erase (embd_inp.begin (), embd_inp.begin () + (n_past-old_n_past));
523-
542+ // printf("np:%d newembinp: \n",n_past);
543+ // print_tok_vec(embd_inp);
524544 }else {
525545 smartcontext.clear ();
526546 }
@@ -544,7 +564,8 @@ void print_tok_vec(std::vector<float> &embd)
544564 int shiftamt = embd_inp.size () * SCTruncationRatio;
545565 smartcontext = std::vector<int >(embd_inp.begin () + shiftamt, embd_inp.end ());
546566 printf (" \n [New Smart Context Triggered! Buffered Token Allowance: %d]" ,shiftamt);
547-
567+ // printf("smartctx:\n");
568+ // print_tok_vec(smartcontext,&vocab.id_to_token);
548569 embd_inp = smartcontext;
549570 // if max ctx length is exceeded, chop the prompt in half after the start part, and memorize it. The memorized part becomes LCS marker.
550571 // when a future prompt comes in, find the LCS again. If LCS > a length and LCS starts with memorized LCS
0 commit comments