@@ -106,7 +106,7 @@ static kcpp_params * kcpp_data = nullptr;
106106static int max_context_limit_at_load = 0 ;
107107static int n_past = 0 ;
108108static int debugmode = 0 ; // -1 = hide all, 0 = normal, 1 = showall
109- static bool quiet = false ;
109+ static bool is_quiet = false ;
110110static std::vector<gpt_vocab::id> last_n_tokens;
111111static std::vector<gpt_vocab::id> current_context_tokens;
112112static size_t mem_per_token = 0 ;
@@ -939,12 +939,12 @@ void sample_xtc(llama_token_data_array * candidates, float xtc_threshold, float
939939
940940 if (last_idx>1 ) // if there are 2 or more viable candidates
941941 {
942- if (debugmode==1 && !quiet ) {
942+ if (debugmode==1 && !is_quiet ) {
943943 printf (" XTC penalties [" );
944944 }
945945 // then remove all other tokens above threshold EXCEPT the least likely one
946946 for (size_t i = 0 ; i < last_idx - 1 ; ++i) {
947- if (debugmode==1 && !quiet )
947+ if (debugmode==1 && !is_quiet )
948948 {
949949 gpt_vocab::id token = candidates->data [i].id ;
950950 std::string tokenizedstr = FileFormatTokenizeID (token, file_format);
@@ -953,7 +953,7 @@ void sample_xtc(llama_token_data_array * candidates, float xtc_threshold, float
953953 }
954954 candidates->data [i].logit -= 999 .0f ; // infinity gets wonky results downstream, this hack works well enough
955955 }
956- if (debugmode==1 && !quiet ) {
956+ if (debugmode==1 && !is_quiet ) {
957957 printf (" ]\n " );
958958 }
959959 candidates->sorted = false ;
@@ -1142,7 +1142,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11421142 max_exponent = FLOAT_MAX_LOG / std::log (penalty_base);
11431143 }
11441144
1145- if (debugmode==1 && !quiet && !dry_max_token_repeat.empty ()) {
1145+ if (debugmode==1 && !is_quiet && !dry_max_token_repeat.empty ()) {
11461146 printf (" DRY penalties [" );
11471147 }
11481148 size_t count = 0 ;
@@ -1153,7 +1153,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11531153 repeat_exp = max_exponent;
11541154 }
11551155 float penalty = penalty_multiplier * pow (penalty_base, repeat_exp);
1156- if (debugmode==1 && !quiet )
1156+ if (debugmode==1 && !is_quiet )
11571157 {
11581158 std::string tokenizedstr = FileFormatTokenizeID (token, file_format);
11591159 ::utreplace (tokenizedstr, " \n " , " \\ n" );
@@ -1166,7 +1166,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11661166 {
11671167 candidates->sorted = false ;
11681168 }
1169- if (debugmode==1 && !quiet && !dry_max_token_repeat.empty ()) {
1169+ if (debugmode==1 && !is_quiet && !dry_max_token_repeat.empty ()) {
11701170 printf (" ]\n " );
11711171 }
11721172}
@@ -1697,7 +1697,7 @@ static void load_grammar(const std::string & gammarstr)
16971697 printf (" \n Ignored invalid grammar sampler." );
16981698 return ;
16991699 }
1700- if (debugmode==1 && !quiet )
1700+ if (debugmode==1 && !is_quiet )
17011701 {
17021702 parsed_grammar.print (stderr);
17031703 }
@@ -1840,7 +1840,7 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
18401840 float chi_ctx_value = (n_ctx_desired * ctx_multiplier) / 6.28318 ;
18411841 float gradient_ai_rope_freq_base_value = powf (original_rope_base, log10f (chi_ctx_value) / log10f (chi_ctx_train_value));
18421842
1843- if (debugmode==1 && !quiet )
1843+ if (debugmode==1 && !is_quiet )
18441844 {
18451845 printf (" Trained max context length (value:%.d).\n " , n_ctx_train);
18461846 printf (" Desired context length (value:%.d).\n " , n_ctx_desired);
@@ -1857,7 +1857,7 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
18571857 {
18581858 float extended_rope_positive_offset_value = 1 + ((log10f (chi_ctx_value) - log10f (chi_ctx_train_value)) / ((log10f (chi_ctx_value) * log10f (chi_ctx_train_value)) - (log10f (chi_ctx_value) + log10f (chi_ctx_train_value))));
18591859 float rope_freq_base_with_positive_offset = gradient_ai_rope_freq_base_value * extended_rope_positive_offset_value;
1860- if (debugmode==1 && !quiet )
1860+ if (debugmode==1 && !is_quiet )
18611861 {
18621862 printf (" Extended RoPE Positive Offset (multiplicator) for Solar based models. (value:%.3f).\n " , extended_rope_positive_offset_value);
18631863 printf (" RoPE base calculated via Gradient AI formula for Solar based models. (value:%.1f).\n " , rope_freq_base_with_positive_offset);
@@ -1873,6 +1873,7 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
18731873
18741874ModelLoadResult gpttype_load_model (const load_model_inputs inputs, FileFormat in_file_format, FileFormatExtraMeta in_file_format_meta)
18751875{
1876+ is_quiet = inputs.quiet ;
18761877 ggml_time_init ();
18771878 kcpp_data = new kcpp_params (); // allocate on heap to avoid linux segfault. yes this leaks memory.
18781879
@@ -2688,13 +2689,13 @@ std::vector<int> gpttype_get_token_arr(const std::string & input, bool addbos)
26882689 printf (" \n Warning: KCPP text generation not initialized!\n " );
26892690 return toks;
26902691 }
2691- if (debugmode==1 && !quiet )
2692+ if (debugmode==1 && !is_quiet )
26922693 {
26932694 printf (" \n FileFormat: %d, Tokenizing: %s" ,file_format ,input.c_str ());
26942695 }
26952696 TokenizeString (input, toks, file_format,addbos);
26962697 int tokcount = toks.size ();
2697- if (debugmode==1 && !quiet )
2698+ if (debugmode==1 && !is_quiet )
26982699 {
26992700 printf (" \n Tokens Counted: %d\n " ,tokcount);
27002701 }
@@ -2779,7 +2780,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
27792780 llama_perf_context_reset (llama_ctx_v4);
27802781 }
27812782
2782- quiet = inputs.quiet ;
27832783 generation_finished = false ; // Set current generation status
27842784 generated_tokens.clear (); // New Generation, new tokens
27852785 delayed_generated_tokens.clear ();
@@ -2858,7 +2858,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
28582858 banned_token_ids.clear ();
28592859 if (banned_tokens.size ()>0 )
28602860 {
2861- if (debugmode==1 && !quiet )
2861+ if (debugmode==1 && !is_quiet )
28622862 {
28632863 printf (" \n Banning %zu single character sequences..." ,banned_tokens.size ());
28642864 }
@@ -2875,13 +2875,13 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
28752875 }
28762876 }
28772877 }
2878- if (debugmode==1 && !quiet )
2878+ if (debugmode==1 && !is_quiet )
28792879 {
28802880 printf (" \n Banned a total of %zu individual tokens.\n " ,banned_token_ids.size ());
28812881 }
28822882 }
28832883
2884- if (debugmode==1 && !quiet && banned_phrases.size ()>0 )
2884+ if (debugmode==1 && !is_quiet && banned_phrases.size ()>0 )
28852885 {
28862886 printf (" \n Banned a total of %zu phrases, with max token count of %d.\n " ,banned_phrases.size (),delayed_generated_tokens_limit);
28872887 }
@@ -2926,7 +2926,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
29262926 // images have changed. swap identifiers to force reprocessing
29272927 current_llava_identifier = (current_llava_identifier==LLAVA_TOKEN_IDENTIFIER_A?LLAVA_TOKEN_IDENTIFIER_B:LLAVA_TOKEN_IDENTIFIER_A);
29282928 llava_composite_image_signature = new_llava_composite;
2929- if (debugmode==1 && !quiet )
2929+ if (debugmode==1 && !is_quiet )
29302930 {
29312931 printf (" \n LLAVA images changed, existing cache invalidated" );
29322932 }
@@ -2982,7 +2982,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
29822982 const int MAX_CHAR_LEN = 40 ;
29832983 const int MAX_SEQ_LEN = 20 ;
29842984
2985- if (debugmode == 1 && !quiet )
2985+ if (debugmode == 1 && !is_quiet )
29862986 {
29872987 printf (" \n Processing %zu dry break strings..." , kcpp_data->dry_sequence_breakers .size ());
29882988 }
@@ -2994,7 +2994,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
29942994 }
29952995 GetOverlappingTokenSequences (sequence_break, dry_sequence_breakers, MAX_SEQ_LEN);
29962996 }
2997- if (debugmode == 1 && !quiet )
2997+ if (debugmode == 1 && !is_quiet )
29982998 {
29992999 int trivial = 0 , non_trivial = 0 ;
30003000 for (const auto &seq : dry_sequence_breakers)
@@ -3014,7 +3014,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
30143014 }
30153015
30163016 bool stream_sse = inputs.stream_sse ;
3017- bool allow_regular_prints = (!quiet && debugmode!=-1 );
3017+ bool allow_regular_prints = (!is_quiet && debugmode!=-1 );
30183018
30193019 std::string grammarstr = inputs.grammar ;
30203020 bool grammar_retain_state = inputs.grammar_retain_state ;
@@ -3047,7 +3047,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
30473047 if (kcpp_data->seed <= 0 || kcpp_data->seed ==0xFFFFFFFF )
30483048 {
30493049 kcpp_data->seed = (((uint32_t )time (NULL )) % 1000000u );
3050- if (debugmode==1 && !quiet )
3050+ if (debugmode==1 && !is_quiet )
30513051 {
30523052 printf (" \n Using Seed: %d" ,kcpp_data->seed );
30533053 }
@@ -3079,15 +3079,15 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
30793079 }
30803080 else
30813081 {
3082- if (debugmode==1 && !quiet )
3082+ if (debugmode==1 && !is_quiet )
30833083 {
30843084 printf (" \n Creating clip image embed..." );
30853085 }
30863086 llava_images[i].clp_image_tokens = 0 ;
30873087 if (!llava_image_embed_make_with_clip_img (clp_ctx, kcpp_data->n_threads , clp_img_data, &llava_images[i].clp_img_embd , &llava_images[i].clp_image_tokens )) {
30883088 printf (" \n Error: Clip image %d failed to create embd!" ,i);
30893089 }
3090- if (debugmode==1 && !quiet )
3090+ if (debugmode==1 && !is_quiet )
30913091 {
30923092 printf (" \n LLAVA Clip Embed %i used Tokens: %d" ,i,llava_images[i].clp_image_tokens );
30933093 }
@@ -3210,7 +3210,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
32103210 std::fill (last_n_tokens.begin (), last_n_tokens.end (), 0 );
32113211 n_past = 0 ;
32123212
3213- if (debugmode==1 && !quiet )
3213+ if (debugmode==1 && !is_quiet )
32143214 {
32153215 std::string outstr = " " ;
32163216 printf (" \n\n [Debug: Dump Raw Input Tokens, format: %d]\n " , file_format);
@@ -3355,7 +3355,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
33553355 printf (" \n " );
33563356 }
33573357
3358- if (debugmode==1 && !quiet )
3358+ if (debugmode==1 && !is_quiet )
33593359 {
33603360 std::string outstr = " " ;
33613361 printf (" \n [Debug: Dump Forwarded Input Tokens, format: %d]\n " , file_format);
@@ -3404,7 +3404,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
34043404 draft_used = true ;
34053405 draft_results = speculative_decoding_eval_chunk (draft_ctx, llama_ctx_v4, embd, n_vocab, n_past);
34063406 evalres = draft_results.draft_success ;
3407- if (debugmode==1 && !quiet )
3407+ if (debugmode==1 && !is_quiet )
34083408 {
34093409 std::string draftedtoks = get_tok_vec_str (draft_results.draftids );
34103410 printf (" \n Drafted %d Tokens: [%s]\n " ,speculative_chunk_amt,draftedtoks.c_str ());
@@ -3607,7 +3607,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
36073607 if (draft_used)
36083608 {
36093609 int32_t draftedid = draft_results.draftids [logits_sampled];
3610- if (debugmode==1 && !quiet )
3610+ if (debugmode==1 && !is_quiet )
36113611 {
36123612 std::string drafttok = FileFormatTokenizeID (draftedid, file_format, true );
36133613 std::string realtok = FileFormatTokenizeID (id, file_format, true );
@@ -3660,7 +3660,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
36603660 {
36613661 printf (" \r Generating (%d / %d tokens)" , (kcpp_data->n_predict - remaining_tokens), kcpp_data->n_predict );
36623662 }
3663- if (debugmode==1 && !quiet && top_picks_history.size ()>0 )
3663+ if (debugmode==1 && !is_quiet && top_picks_history.size ()>0 )
36643664 {
36653665 printf (" [" );
36663666 bool firstloop = true ;
@@ -3912,7 +3912,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
39123912 delayed_generated_tokens.pop_front ();
39133913 }
39143914
3915- if (debugmode==1 && !quiet && file_format == FileFormat::GGUF_GENERIC)
3915+ if (debugmode==1 && !is_quiet && file_format == FileFormat::GGUF_GENERIC)
39163916 {
39173917 printf (" \n " );
39183918 llama_perf_context_print (llama_ctx_v4);
0 commit comments