1414
1515#include < chrono>
1616
17+ // static int debugmode;
18+
1719static auto bench_timer = std::chrono::high_resolution_clock().now();
1820
1921void timer_start ()
@@ -427,10 +429,10 @@ void print_tok_vec(std::vector<float> &embd)
427429 const float SCTruncationRatio = 0.5 ; // ratio for how many tokens to fast forward
428430 const int SCTokThreshold = 32 + (nctx*0.05 ); // how many tokens of similarity triggers smartcontext
429431
430- // printf("\nORIGINAL CTX:\n");
431- // print_tok_vec(current_context_tokens);
432- // printf("\nORIGINAL EMBD:\n");
433- // print_tok_vec(embd_inp);
432+ // printf("\nORIGINAL CTX:\n");
433+ // print_tok_vec(current_context_tokens);
434+ // printf("\nORIGINAL EMBD:\n");
435+ // print_tok_vec(embd_inp);
434436
435437 // fast forward the past based on identical tokens, stop once a divergence is noted
436438 int embd_inp_len = embd_inp.size ();
@@ -479,9 +481,9 @@ void print_tok_vec(std::vector<float> &embd)
479481 embd_inp.erase (embd_inp.begin (), embd_inp.begin () + n_past);
480482 embd_inp_len = embd_inp.size ();
481483
482- // printf("\nconds: %d %d %d\n",current_context_tokens.size() >= nctx*0.8
483- // embd_inp_len >= nctx*0.6 ,current_context_tokens.size() - n_past > nctx*0.5) ;
484- // printf("csiz:%d par:%d eilen:%d np:%d",current_context_tokens.size(), (int)(nctx*0.8),embd_inp_len,n_past);
484+ printf (" \n conds: %d %d %d\n " ,current_context_tokens.size () >= nctx*0.8 );
485+ embd_inp_len >= nctx*0.6 ,current_context_tokens.size () - n_past > nctx*0.5 ;
486+ printf (" csiz:%d par:%d eilen:%d np:%d" ,current_context_tokens.size (), (int )(nctx*0.8 ),embd_inp_len,n_past);
485487 }
486488
487489 // smart context mode, detect if we have a shifted context at max length
@@ -490,14 +492,14 @@ void print_tok_vec(std::vector<float> &embd)
490492
491493 if (fastforwardok && useSmartContext && smartcontext.size () > 0 && embd_inp_len >= SCInpLenThreshold)
492494 {
493- // printf("curfullcontext:\n");
494- // print_tok_vec(current_context_tokens);
495+ // printf("curfullcontext:\n");
496+ // print_tok_vec(current_context_tokens);
495497
496498 // see if smartcontext is still usable
497- // printf("smartctx:\n");
498- // print_tok_vec(smartcontext);
499- // printf("embinp:\n");
500- // print_tok_vec(embd_inp);
499+ // printf("smartctx:\n");
500+ // if(debugmode==1) { print_tok_vec(smartcontext);}
501+ // printf("embinp:\n");
502+ // if(debugmode==1) { print_tok_vec(embd_inp);}
501503
502504 auto shared = LongestCommonSubseq (smartcontext, embd_inp);
503505 if (shared.size () > SCTokThreshold && ArrStartWith (smartcontext, shared)) // at least 32 tokens in common
@@ -508,8 +510,8 @@ void print_tok_vec(std::vector<float> &embd)
508510 auto trimmed = std::vector<int >(embd_inp.begin () + found, embd_inp.end ());
509511 embd_inp = trimmed;
510512 embd_inp_len = embd_inp.size ();
511- // printf("trimmed:\n");
512- // print_tok_vec(embd_inp,&vocab.id_to_token);
513+ // printf("trimmed:\n");
514+ // if(debugmode==1) { print_tok_vec(embd_inp);}
513515 printf (" \n [Reusing Smart Context: %d allowance remaining]" , found);
514516
515517 int old_n_past = n_past;
@@ -521,7 +523,7 @@ void print_tok_vec(std::vector<float> &embd)
521523
522524 for (int i = n_past; i < current_context_tokens.size (); ++i)
523525 {
524- // printf("\n%s and %s\n",vocab.id_to_token[ current_context_tokens[i]].c_str(), vocab.id_to_token[ embd_inp[i-offset_fix]].c_str() );
526+ printf (" \n %s and %s\n " ,current_context_tokens[i], embd_inp[i-offset_fix]);
525527 if (current_context_tokens[i] == embd_inp[i-offset_fix])
526528 {
527529 n_past += 1 ;
@@ -539,8 +541,8 @@ void print_tok_vec(std::vector<float> &embd)
539541
540542 last_n_tokens.erase (last_n_tokens.begin (), last_n_tokens.begin () + (n_past-old_n_past));
541543 embd_inp.erase (embd_inp.begin (), embd_inp.begin () + (n_past-old_n_past));
542- // printf("np:%d newembinp: \n",n_past);
543- // print_tok_vec(embd_inp);
544+ // printf("np:%d newembinp: \n",n_past);
545+ // if (debugmode==1) { print_tok_vec(embd_inp);}
544546 }else {
545547 smartcontext.clear ();
546548 }
@@ -563,9 +565,9 @@ void print_tok_vec(std::vector<float> &embd)
563565 // determine longest common substring after removing start part
564566 int shiftamt = embd_inp.size () * SCTruncationRatio;
565567 smartcontext = std::vector<int >(embd_inp.begin () + shiftamt, embd_inp.end ());
566- printf (" \n [New Smart Context Triggered! Buffered Token Allowance: %d]" ,shiftamt);
567- // printf("smartctx:\n");
568- // print_tok_vec(smartcontext,&vocab.id_to_token);
568+ printf (" \n [New Smart Context Triggered! Buffered Token Allowance: %d]" ,shiftamt);
569+ // printf("smartctx:\n");
570+ // if(debugmode==1) { print_tok_vec(smartcontext);}
569571 embd_inp = smartcontext;
570572 // if max ctx length is exceeded, chop the prompt in half after the start part, and memorize it. The memorized part becomes LCS marker.
571573 // when a future prompt comes in, find the LCS again. If LCS > a length and LCS starts with memorized LCS
0 commit comments