Skip to content

Commit 0e45d3b

Browse files
committed
quiet flags now set at load time
1 parent bec2314 commit 0e45d3b

File tree

7 files changed

+100
-94
lines changed

7 files changed

+100
-94
lines changed

expose.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ struct load_model_inputs
5454
const int cublas_info = 0;
5555
const char * vulkan_info = nullptr;
5656
const int blasbatchsize = 512;
57-
const int debugmode = 0;
5857
const int forceversion = 0;
5958
const int gpulayers = 0;
6059
const float rope_freq_scale = 1.0f;
@@ -64,6 +63,8 @@ struct load_model_inputs
6463
const float tensor_split[tensor_split_max] = {};
6564
const int quant_k = 0;
6665
const int quant_v = 0;
66+
const bool quiet = false;
67+
const int debugmode = 0;
6768
};
6869
struct generation_inputs
6970
{
@@ -97,7 +98,6 @@ struct generation_inputs
9798
const bool stream_sse = false;
9899
const char * grammar = nullptr;
99100
const bool grammar_retain_state = false;
100-
const bool quiet = false;
101101
const float dynatemp_range = 0.0f;
102102
const float dynatemp_exponent = 1.0f;
103103
const float smoothing_factor = 0.0f;
@@ -157,6 +157,7 @@ struct sd_load_model_inputs
157157
const char * vae_filename = nullptr;
158158
const char * lora_filename = nullptr;
159159
const float lora_multiplier = 1.0f;
160+
const bool quiet = false;
160161
const int debugmode = 0;
161162
};
162163
struct sd_generation_inputs
@@ -172,7 +173,6 @@ struct sd_generation_inputs
172173
const int seed = 0;
173174
const char * sample_method = nullptr;
174175
const int clip_skip = -1;
175-
const bool quiet = false;
176176
};
177177
struct sd_generation_outputs
178178
{
@@ -187,6 +187,7 @@ struct whisper_load_model_inputs
187187
const int clblast_info = 0;
188188
const int cublas_info = 0;
189189
const char * vulkan_info = nullptr;
190+
const bool quiet = false;
190191
const int debugmode = 0;
191192
};
192193
struct whisper_generation_inputs
@@ -195,7 +196,6 @@ struct whisper_generation_inputs
195196
const char * audio_data = nullptr;
196197
const bool suppress_non_speech = false;
197198
const char * langcode = nullptr;
198-
const bool quiet = false;
199199
};
200200
struct whisper_generation_outputs
201201
{
@@ -214,14 +214,14 @@ struct tts_load_model_inputs
214214
const char * vulkan_info = nullptr;
215215
const int gpulayers = 0;
216216
const bool flash_attention = false;
217+
const bool quiet = false;
217218
const int debugmode = 0;
218219
};
219220
struct tts_generation_inputs
220221
{
221222
const char * prompt = nullptr;
222223
const int speaker_seed = 0;
223224
const int audio_seed = 0;
224-
const bool quiet = false;
225225
const bool nocache = false;
226226
};
227227
struct tts_generation_outputs

gpttype_adapter.cpp

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ static kcpp_params * kcpp_data = nullptr;
106106
static int max_context_limit_at_load = 0;
107107
static int n_past = 0;
108108
static int debugmode = 0; //-1 = hide all, 0 = normal, 1 = showall
109-
static bool quiet = false;
109+
static bool is_quiet = false;
110110
static std::vector<gpt_vocab::id> last_n_tokens;
111111
static std::vector<gpt_vocab::id> current_context_tokens;
112112
static size_t mem_per_token = 0;
@@ -939,12 +939,12 @@ void sample_xtc(llama_token_data_array * candidates, float xtc_threshold, float
939939

940940
if(last_idx>1) //if there are 2 or more viable candidates
941941
{
942-
if (debugmode==1 && !quiet) {
942+
if (debugmode==1 && !is_quiet) {
943943
printf("XTC penalties [");
944944
}
945945
// then remove all other tokens above threshold EXCEPT the least likely one
946946
for (size_t i = 0; i < last_idx - 1; ++i) {
947-
if (debugmode==1 && !quiet)
947+
if (debugmode==1 && !is_quiet)
948948
{
949949
gpt_vocab::id token = candidates->data[i].id;
950950
std::string tokenizedstr = FileFormatTokenizeID(token, file_format);
@@ -953,7 +953,7 @@ void sample_xtc(llama_token_data_array * candidates, float xtc_threshold, float
953953
}
954954
candidates->data[i].logit -= 999.0f; //infinity gets wonky results downstream, this hack works well enough
955955
}
956-
if (debugmode==1 && !quiet) {
956+
if (debugmode==1 && !is_quiet) {
957957
printf("]\n");
958958
}
959959
candidates->sorted = false;
@@ -1142,7 +1142,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11421142
max_exponent = FLOAT_MAX_LOG / std::log(penalty_base);
11431143
}
11441144

1145-
if (debugmode==1 && !quiet && !dry_max_token_repeat.empty()) {
1145+
if (debugmode==1 && !is_quiet && !dry_max_token_repeat.empty()) {
11461146
printf("DRY penalties [");
11471147
}
11481148
size_t count = 0;
@@ -1153,7 +1153,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11531153
repeat_exp = max_exponent;
11541154
}
11551155
float penalty = penalty_multiplier * pow(penalty_base, repeat_exp);
1156-
if (debugmode==1 && !quiet)
1156+
if (debugmode==1 && !is_quiet)
11571157
{
11581158
std::string tokenizedstr = FileFormatTokenizeID(token, file_format);
11591159
::utreplace(tokenizedstr, "\n", "\\n");
@@ -1166,7 +1166,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11661166
{
11671167
candidates->sorted = false;
11681168
}
1169-
if (debugmode==1 && !quiet && !dry_max_token_repeat.empty()) {
1169+
if (debugmode==1 && !is_quiet && !dry_max_token_repeat.empty()) {
11701170
printf("]\n");
11711171
}
11721172
}
@@ -1697,7 +1697,7 @@ static void load_grammar(const std::string & gammarstr)
16971697
printf("\nIgnored invalid grammar sampler.");
16981698
return;
16991699
}
1700-
if(debugmode==1 && !quiet)
1700+
if(debugmode==1 && !is_quiet)
17011701
{
17021702
parsed_grammar.print(stderr);
17031703
}
@@ -1840,7 +1840,7 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
18401840
float chi_ctx_value = (n_ctx_desired * ctx_multiplier) / 6.28318;
18411841
float gradient_ai_rope_freq_base_value = powf(original_rope_base, log10f(chi_ctx_value) / log10f(chi_ctx_train_value));
18421842

1843-
if(debugmode==1 && !quiet)
1843+
if(debugmode==1 && !is_quiet)
18441844
{
18451845
printf("Trained max context length (value:%.d).\n", n_ctx_train);
18461846
printf("Desired context length (value:%.d).\n", n_ctx_desired);
@@ -1857,7 +1857,7 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
18571857
{
18581858
float extended_rope_positive_offset_value = 1 + ((log10f(chi_ctx_value) - log10f(chi_ctx_train_value)) / ((log10f(chi_ctx_value) * log10f(chi_ctx_train_value)) - (log10f(chi_ctx_value) + log10f(chi_ctx_train_value))));
18591859
float rope_freq_base_with_positive_offset = gradient_ai_rope_freq_base_value * extended_rope_positive_offset_value;
1860-
if(debugmode==1 && !quiet)
1860+
if(debugmode==1 && !is_quiet)
18611861
{
18621862
printf("Extended RoPE Positive Offset (multiplicator) for Solar based models. (value:%.3f).\n", extended_rope_positive_offset_value);
18631863
printf("RoPE base calculated via Gradient AI formula for Solar based models. (value:%.1f).\n", rope_freq_base_with_positive_offset);
@@ -1873,6 +1873,7 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
18731873

18741874
ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in_file_format, FileFormatExtraMeta in_file_format_meta)
18751875
{
1876+
is_quiet = inputs.quiet;
18761877
ggml_time_init();
18771878
kcpp_data = new kcpp_params(); //allocate on heap to avoid linux segfault. yes this leaks memory.
18781879

@@ -2688,13 +2689,13 @@ std::vector<int> gpttype_get_token_arr(const std::string & input, bool addbos)
26882689
printf("\nWarning: KCPP text generation not initialized!\n");
26892690
return toks;
26902691
}
2691-
if(debugmode==1 && !quiet)
2692+
if(debugmode==1 && !is_quiet)
26922693
{
26932694
printf("\nFileFormat: %d, Tokenizing: %s",file_format ,input.c_str());
26942695
}
26952696
TokenizeString(input, toks, file_format,addbos);
26962697
int tokcount = toks.size();
2697-
if(debugmode==1 && !quiet)
2698+
if(debugmode==1 && !is_quiet)
26982699
{
26992700
printf("\nTokens Counted: %d\n",tokcount);
27002701
}
@@ -2779,7 +2780,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
27792780
llama_perf_context_reset(llama_ctx_v4);
27802781
}
27812782

2782-
quiet = inputs.quiet;
27832783
generation_finished = false; // Set current generation status
27842784
generated_tokens.clear(); // New Generation, new tokens
27852785
delayed_generated_tokens.clear();
@@ -2858,7 +2858,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
28582858
banned_token_ids.clear();
28592859
if(banned_tokens.size()>0)
28602860
{
2861-
if(debugmode==1 && !quiet)
2861+
if(debugmode==1 && !is_quiet)
28622862
{
28632863
printf("\nBanning %zu single character sequences...",banned_tokens.size());
28642864
}
@@ -2875,13 +2875,13 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
28752875
}
28762876
}
28772877
}
2878-
if(debugmode==1 && !quiet)
2878+
if(debugmode==1 && !is_quiet)
28792879
{
28802880
printf("\nBanned a total of %zu individual tokens.\n",banned_token_ids.size());
28812881
}
28822882
}
28832883

2884-
if(debugmode==1 && !quiet && banned_phrases.size()>0)
2884+
if(debugmode==1 && !is_quiet && banned_phrases.size()>0)
28852885
{
28862886
printf("\nBanned a total of %zu phrases, with max token count of %d.\n",banned_phrases.size(),delayed_generated_tokens_limit);
28872887
}
@@ -2926,7 +2926,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
29262926
//images have changed. swap identifiers to force reprocessing
29272927
current_llava_identifier = (current_llava_identifier==LLAVA_TOKEN_IDENTIFIER_A?LLAVA_TOKEN_IDENTIFIER_B:LLAVA_TOKEN_IDENTIFIER_A);
29282928
llava_composite_image_signature = new_llava_composite;
2929-
if(debugmode==1 && !quiet)
2929+
if(debugmode==1 && !is_quiet)
29302930
{
29312931
printf("\nLLAVA images changed, existing cache invalidated");
29322932
}
@@ -2982,7 +2982,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
29822982
const int MAX_CHAR_LEN = 40;
29832983
const int MAX_SEQ_LEN = 20;
29842984

2985-
if (debugmode == 1 && !quiet)
2985+
if (debugmode == 1 && !is_quiet)
29862986
{
29872987
printf("\nProcessing %zu dry break strings...", kcpp_data->dry_sequence_breakers.size());
29882988
}
@@ -2994,7 +2994,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
29942994
}
29952995
GetOverlappingTokenSequences(sequence_break, dry_sequence_breakers, MAX_SEQ_LEN);
29962996
}
2997-
if (debugmode == 1 && !quiet)
2997+
if (debugmode == 1 && !is_quiet)
29982998
{
29992999
int trivial = 0, non_trivial = 0;
30003000
for (const auto &seq : dry_sequence_breakers)
@@ -3014,7 +3014,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
30143014
}
30153015

30163016
bool stream_sse = inputs.stream_sse;
3017-
bool allow_regular_prints = (!quiet && debugmode!=-1);
3017+
bool allow_regular_prints = (!is_quiet && debugmode!=-1);
30183018

30193019
std::string grammarstr = inputs.grammar;
30203020
bool grammar_retain_state = inputs.grammar_retain_state;
@@ -3047,7 +3047,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
30473047
if (kcpp_data->seed <= 0 || kcpp_data->seed==0xFFFFFFFF)
30483048
{
30493049
kcpp_data->seed = (((uint32_t)time(NULL)) % 1000000u);
3050-
if(debugmode==1 && !quiet)
3050+
if(debugmode==1 && !is_quiet)
30513051
{
30523052
printf("\nUsing Seed: %d",kcpp_data->seed);
30533053
}
@@ -3079,15 +3079,15 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
30793079
}
30803080
else
30813081
{
3082-
if(debugmode==1 && !quiet)
3082+
if(debugmode==1 && !is_quiet)
30833083
{
30843084
printf("\nCreating clip image embed...");
30853085
}
30863086
llava_images[i].clp_image_tokens = 0;
30873087
if (!llava_image_embed_make_with_clip_img(clp_ctx, kcpp_data->n_threads, clp_img_data, &llava_images[i].clp_img_embd, &llava_images[i].clp_image_tokens)) {
30883088
printf("\nError: Clip image %d failed to create embd!",i);
30893089
}
3090-
if(debugmode==1 && !quiet)
3090+
if(debugmode==1 && !is_quiet)
30913091
{
30923092
printf("\nLLAVA Clip Embed %i used Tokens: %d",i,llava_images[i].clp_image_tokens);
30933093
}
@@ -3210,7 +3210,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
32103210
std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);
32113211
n_past = 0;
32123212

3213-
if (debugmode==1 && !quiet)
3213+
if (debugmode==1 && !is_quiet)
32143214
{
32153215
std::string outstr = "";
32163216
printf("\n\n[Debug: Dump Raw Input Tokens, format: %d]\n", file_format);
@@ -3355,7 +3355,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
33553355
printf("\n");
33563356
}
33573357

3358-
if (debugmode==1 && !quiet)
3358+
if (debugmode==1 && !is_quiet)
33593359
{
33603360
std::string outstr = "";
33613361
printf("\n[Debug: Dump Forwarded Input Tokens, format: %d]\n", file_format);
@@ -3404,7 +3404,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
34043404
draft_used = true;
34053405
draft_results = speculative_decoding_eval_chunk(draft_ctx, llama_ctx_v4, embd, n_vocab, n_past);
34063406
evalres = draft_results.draft_success;
3407-
if(debugmode==1 && !quiet)
3407+
if(debugmode==1 && !is_quiet)
34083408
{
34093409
std::string draftedtoks = get_tok_vec_str(draft_results.draftids);
34103410
printf("\nDrafted %d Tokens: [%s]\n",speculative_chunk_amt,draftedtoks.c_str());
@@ -3607,7 +3607,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
36073607
if(draft_used)
36083608
{
36093609
int32_t draftedid = draft_results.draftids[logits_sampled];
3610-
if(debugmode==1 && !quiet)
3610+
if(debugmode==1 && !is_quiet)
36113611
{
36123612
std::string drafttok = FileFormatTokenizeID(draftedid, file_format, true);
36133613
std::string realtok = FileFormatTokenizeID(id, file_format, true);
@@ -3660,7 +3660,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
36603660
{
36613661
printf("\rGenerating (%d / %d tokens)", (kcpp_data->n_predict - remaining_tokens), kcpp_data->n_predict);
36623662
}
3663-
if(debugmode==1 && !quiet && top_picks_history.size()>0)
3663+
if(debugmode==1 && !is_quiet && top_picks_history.size()>0)
36643664
{
36653665
printf(" [");
36663666
bool firstloop = true;
@@ -3912,7 +3912,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
39123912
delayed_generated_tokens.pop_front();
39133913
}
39143914

3915-
if(debugmode==1 && !quiet && file_format == FileFormat::GGUF_GENERIC)
3915+
if(debugmode==1 && !is_quiet && file_format == FileFormat::GGUF_GENERIC)
39163916
{
39173917
printf("\n");
39183918
llama_perf_context_print(llama_ctx_v4);

klite.embd

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
1212
-->
1313

1414
<script>
15-
const LITEVER = 205;
15+
const LITEVER = 206;
1616
const urlParams = new URLSearchParams(window.location.search);
1717
var localflag = true;
1818
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@@ -5009,8 +5009,17 @@ initializeInstructUIFunctionality();
50095009
function copyMarkdownCode(btn)
50105010
{
50115011
const codeContainer = btn.parentElement.querySelector('pre code');
5012-
//selectElementContents(codeContainer);
5013-
navigator.clipboard.writeText(codeContainer.innerText);
5012+
let innercode = codeContainer.innerText;
5013+
//remove common language descriptiors from the start
5014+
let langsmatched = ["matlab","jsonc","powershell","ps1","haskell","hs","vbnet","vb","apache","apacheconf","makefile","mk","ini","protobuf","proto","typescript","tsx","markdown","md","mkdown","mkd","python","py","javascript","js","jsx","html","xhtml","xml","css","json","typescript","ts","tsx","bash","sh","zsh","java","csharp","cs","c","h","cpp","hpp","php","sql","ruby","rb","go","golang","kotlin","kt","swift","rust","rs","r","dart","scala","dockerfile","docker","yaml","yml","ini","toml","perl","pl","shell","console","powershell","ps1","lua","typescript","ts"];
5015+
for(let i = 0; i < langsmatched.length; ++i) {
5016+
let matcher = langsmatched[i]+"\n";
5017+
if (innercode.startsWith(matcher)) {
5018+
innercode = innercode.substring(matcher.length);
5019+
break;
5020+
}
5021+
}
5022+
navigator.clipboard.writeText(innercode);
50145023
}
50155024

50165025
function simpleMarkdown(text) {
@@ -13469,7 +13478,12 @@ initializeInstructUIFunctionality();
1346913478
if (document.getElementById("jailbreakprompt2") && document.getElementById("jailbreakprompt2").checked && document.getElementById("jailbreakprompttext2").value!="") {
1347013479
let addrole = document.getElementById("jailbreakprompttext2role").value;
1347113480
addrole = ((addrole==2)?"system":(addrole==1?"assistant":"user"));
13472-
oai_payload.messages.push({ "role": addrole, "content": document.getElementById("jailbreakprompttext2").value });
13481+
let postmsg = { "role": addrole, "content": document.getElementById("jailbreakprompttext2").value };
13482+
if(addrole=="assistant" && targetep.toLowerCase().includes("api.deepseek.com"))
13483+
{
13484+
postmsg["prefix"] = true;
13485+
}
13486+
oai_payload.messages.push(postmsg);
1347313487
}
1347413488

1347513489
oaiemulatecompletionscontent = "";

0 commit comments

Comments
 (0)