Skip to content

Commit 4b0b976

Browse files
committed
Merge branch 'concedo_experimental' into croco_nex_0
2 parents 5ca4b5a + 03def28 commit 4b0b976

File tree

6 files changed

+66
-49
lines changed

6 files changed

+66
-49
lines changed

colab.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@
122122
"if TTSCommand:\n",
123123
" !aria2c -x 10 -o ttsmodel.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $TTSModel\n",
124124
" !aria2c -x 10 -o ttswavtok.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $WavTokModel\n",
125-
"!./koboldcpp_linux model.gguf --usecublas 0 mmq --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand\n"
125+
"!./koboldcpp_linux model.gguf --usecublas 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand\n"
126126
]
127127
}
128128
],

gpttype_adapter.cpp

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ static kcpp_params * kcpp_data = nullptr;
108108
static int max_context_limit_at_load = 0;
109109
static int n_past = 0;
110110
static int debugmode = 0; //-1 = hide all, 0 = normal, 1 = showall
111+
static bool quiet = false;
111112
static std::vector<gpt_vocab::id> last_n_tokens;
112113
static std::vector<gpt_vocab::id> current_context_tokens;
113114
static size_t mem_per_token = 0;
@@ -936,12 +937,12 @@ void sample_xtc(llama_token_data_array * candidates, float xtc_threshold, float
936937

937938
if(last_idx>1) //if there are 2 or more viable candidates
938939
{
939-
if (debugmode==1) {
940+
if (debugmode==1 && !quiet) {
940941
printf("XTC penalties [");
941942
}
942943
// then remove all other tokens above threshold EXCEPT the least likely one
943944
for (size_t i = 0; i < last_idx - 1; ++i) {
944-
if (debugmode==1)
945+
if (debugmode==1 && !quiet)
945946
{
946947
gpt_vocab::id token = candidates->data[i].id;
947948
std::string tokenizedstr = FileFormatTokenizeID(token, file_format);
@@ -950,7 +951,7 @@ void sample_xtc(llama_token_data_array * candidates, float xtc_threshold, float
950951
}
951952
candidates->data[i].logit -= 999.0f; //infinity gets wonky results downstream, this hack works well enough
952953
}
953-
if (debugmode==1) {
954+
if (debugmode==1 && !quiet) {
954955
printf("]\n");
955956
}
956957
candidates->sorted = false;
@@ -1139,7 +1140,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11391140
max_exponent = FLOAT_MAX_LOG / std::log(penalty_base);
11401141
}
11411142

1142-
if (debugmode==1 && !dry_max_token_repeat.empty()) {
1143+
if (debugmode==1 && !quiet && !dry_max_token_repeat.empty()) {
11431144
printf("DRY penalties [");
11441145
}
11451146
size_t count = 0;
@@ -1150,7 +1151,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11501151
repeat_exp = max_exponent;
11511152
}
11521153
float penalty = penalty_multiplier * pow(penalty_base, repeat_exp);
1153-
if (debugmode==1)
1154+
if (debugmode==1 && !quiet)
11541155
{
11551156
std::string tokenizedstr = FileFormatTokenizeID(token, file_format);
11561157
::utreplace(tokenizedstr, "\n", "\\n");
@@ -1163,7 +1164,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11631164
{
11641165
candidates->sorted = false;
11651166
}
1166-
if (debugmode==1 && !dry_max_token_repeat.empty()) {
1167+
if (debugmode==1 && !quiet && !dry_max_token_repeat.empty()) {
11671168
printf("]\n");
11681169
}
11691170
}
@@ -1694,7 +1695,7 @@ static void load_grammar(const std::string & gammarstr)
16941695
printf("\nIgnored invalid grammar sampler.");
16951696
return;
16961697
}
1697-
if(debugmode==1)
1698+
if(debugmode==1 && !quiet)
16981699
{
16991700
parsed_grammar.print(stderr);
17001701
}
@@ -1864,7 +1865,7 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
18641865
printf("Trained max context length (value:%.d).\n", n_ctx_train);
18651866
printf("Desired context length (value:%.d).\n", n_ctx_desired);
18661867

1867-
if(debugmode==1)
1868+
if(debugmode==1 && !quiet)
18681869
{
18691870
printf("Solar context multiplier (value:%.3f).\n", ctx_multiplier);
18701871
printf("Chi context train (value:%.3f).\n", chi_ctx_train_value);
@@ -1880,7 +1881,7 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
18801881
{
18811882
float extended_rope_positive_offset_value = 1 + ((log10f(chi_ctx_value) - log10f(chi_ctx_train_value)) / ((log10f(chi_ctx_value) * log10f(chi_ctx_train_value)) - (log10f(chi_ctx_value) + log10f(chi_ctx_train_value))));
18821883
float rope_freq_base_with_positive_offset = gradient_ai_rope_freq_base_value * extended_rope_positive_offset_value;
1883-
// if(debugmode==1)
1884+
// if(debugmode==1 && !quiet)
18841885
// {
18851886
printf("Extended RoPE Positive Offset (multiplicator) for Solar based models. (value:%.3f).\n", extended_rope_positive_offset_value);
18861887
printf("RoPE base calculated via Gradient AI formula for Solar based models. (value:%.1f).\n", rope_freq_base_with_positive_offset);
@@ -3034,13 +3035,13 @@ std::vector<int> gpttype_get_token_arr(const std::string & input, bool addbos)
30343035
printf("\nWarning: KCPP text generation not initialized!\n");
30353036
return toks;
30363037
}
3037-
if(debugmode==1)
3038+
if(debugmode==1 && !quiet)
30383039
{
30393040
printf("\nFileFormat: %d, Tokenizing: %s",file_format ,input.c_str());
30403041
}
30413042
TokenizeString(input, toks, file_format,addbos);
30423043
int tokcount = toks.size();
3043-
if(debugmode==1)
3044+
if(debugmode==1 && !quiet)
30443045
{
30453046
printf("\nTokens Counted: %d\n",tokcount);
30463047
}
@@ -3125,6 +3126,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
31253126
llama_perf_context_reset(llama_ctx_v4);
31263127
}
31273128

3129+
quiet = inputs.quiet;
31283130
generation_finished = false; // Set current generation status
31293131
generated_tokens.clear(); // New Generation, new tokens
31303132
delayed_generated_tokens.clear();
@@ -3203,7 +3205,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
32033205
banned_token_ids.clear();
32043206
if(banned_tokens.size()>0)
32053207
{
3206-
// if(debugmode==1)
3208+
// if(debugmode==1 && !quiet)
32073209
// {
32083210
printf("\nBanning %zu single character sequences...",banned_tokens.size());
32093211
// }
@@ -3220,16 +3222,18 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
32203222
}
32213223
}
32223224
}
3223-
// if(debugmode==1)
3225+
// if(debugmode==1 && !quiet)
32243226
// {
32253227
printf("\nBanned a total of %zu individual tokens.\n",banned_token_ids.size());
32263228
// }
32273229
}
32283230

3229-
// if(debugmode==1 && banned_phrases.size()>0)
32303231
if(banned_phrases.size()>0)
32313232
{
3233+
// if(debugmode==1 && !quiet)
3234+
// {
32323235
printf("\nBanned a total of %zu phrases, with max token count of %d.\n",banned_phrases.size(),delayed_generated_tokens_limit);
3236+
// }
32333237
}
32343238

32353239
logit_biases.clear();
@@ -3272,7 +3276,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
32723276
//images have changed. swap identifiers to force reprocessing
32733277
current_llava_identifier = (current_llava_identifier==LLAVA_TOKEN_IDENTIFIER_A?LLAVA_TOKEN_IDENTIFIER_B:LLAVA_TOKEN_IDENTIFIER_A);
32743278
llava_composite_image_signature = new_llava_composite;
3275-
if(debugmode==1)
3279+
if(debugmode==1 && !quiet)
32763280
{
32773281
printf("\nLLAVA images changed, existing cache invalidated");
32783282
}
@@ -3328,10 +3332,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
33283332
const int MAX_CHAR_LEN = 40;
33293333
const int MAX_SEQ_LEN = 20;
33303334

3331-
// if (debugmode == 1)
3332-
3335+
// if (debugmode == 1 && !quiet)
3336+
{
33333337
printf("\nProcessing %zu dry break strings...", kcpp_data->dry_sequence_breakers.size());
3334-
3338+
}
33353339
for (auto sequence_break : kcpp_data->dry_sequence_breakers)
33363340
{
33373341
if (sequence_break.size() > MAX_CHAR_LEN)
@@ -3340,7 +3344,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
33403344
}
33413345
GetOverlappingTokenSequences(sequence_break, dry_sequence_breakers, MAX_SEQ_LEN);
33423346
}
3343-
if (debugmode == 1)
3347+
if (debugmode == 1 && !quiet)
33443348
{
33453349
int trivial = 0, non_trivial = 0;
33463350
for (const auto &seq : dry_sequence_breakers)
@@ -3360,9 +3364,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
33603364
}
33613365

33623366
bool stream_sse = inputs.stream_sse;
3363-
3364-
bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1;
3365-
3367+
bool allow_regular_prints = (!quiet && debugmode!=-1);
33663368

33673369
std::string grammarstr = inputs.grammar;
33683370
bool grammar_retain_state = inputs.grammar_retain_state;
@@ -3395,7 +3397,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
33953397
if (kcpp_data->seed <= 0 || kcpp_data->seed==0xFFFFFFFF)
33963398
{
33973399
kcpp_data->seed = (((uint32_t)time(NULL)) % 1000000u);
3398-
if(debugmode==1)
3400+
if(debugmode==1 && !quiet)
33993401
{
34003402
printf("\nUsing Seed: %d",kcpp_data->seed);
34013403
}
@@ -3427,15 +3429,15 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
34273429
}
34283430
else
34293431
{
3430-
if(debugmode==1)
3432+
if(debugmode==1 && !quiet)
34313433
{
34323434
printf("\nCreating clip image embed...");
34333435
}
34343436
llava_images[i].clp_image_tokens = 0;
34353437
if (!llava_image_embed_make_with_clip_img(clp_ctx, kcpp_data->n_threads, clp_img_data, &llava_images[i].clp_img_embd, &llava_images[i].clp_image_tokens)) {
34363438
printf("\nError: Clip image %d failed to create embd!",i);
34373439
}
3438-
if(debugmode==1)
3440+
if(debugmode==1 && !quiet)
34393441
{
34403442
printf("\nLLAVA Clip Embed %i used Tokens: %d",i,llava_images[i].clp_image_tokens);
34413443
}
@@ -3558,7 +3560,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
35583560
std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);
35593561
n_past = 0;
35603562

3561-
if (debugmode==1)
3563+
if (debugmode==1 && !quiet)
35623564
{
35633565
std::string outstr = "";
35643566
printf("\n\n[Debug: Dump Raw Input Tokens, format: %d]\n", file_format);
@@ -3703,7 +3705,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
37033705
printf("\n");
37043706
}
37053707

3706-
if (debugmode==1)
3708+
if (debugmode==1 && !quiet)
37073709
{
37083710
std::string outstr = "";
37093711
printf("\n[Debug: Dump Forwarded Input Tokens, format: %d]\n", file_format);
@@ -3757,7 +3759,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
37573759
draft_used = true;
37583760
draft_results = speculative_decoding_eval_chunk(draft_ctx, llama_ctx_v4, embd, n_vocab, n_past);
37593761
evalres = draft_results.draft_success;
3760-
if(debugmode==1)
3762+
if(debugmode==1 && !quiet)
37613763
{
37623764
std::string draftedtoks = get_tok_vec_str(draft_results.draftids);
37633765
printf("\nDrafted %d Tokens: [%s]\n",speculative_chunk_amt,draftedtoks.c_str());
@@ -4052,7 +4054,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
40524054
if(draft_used)
40534055
{
40544056
int32_t draftedid = draft_results.draftids[logits_sampled];
4055-
if(debugmode==1)
4057+
if(debugmode==1 && !quiet)
40564058
{
40574059
std::string drafttok = FileFormatTokenizeID(draftedid, file_format, true);
40584060
std::string realtok = FileFormatTokenizeID(id, file_format, true);
@@ -4105,7 +4107,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
41054107
{
41064108
printf("\rGenerating (%d / %d tokens)", (kcpp_data->n_predict - remaining_tokens), kcpp_data->n_predict);
41074109
}
4108-
if(debugmode==1 && top_picks_history.size()>0)
4110+
if(debugmode==1 && !quiet && top_picks_history.size()>0)
41094111
{
41104112
printf(" [");
41114113
bool firstloop = true;
@@ -4370,7 +4372,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
43704372
delayed_generated_tokens.pop_front();
43714373
}
43724374

4373-
if(debugmode==1 && file_format == FileFormat::GGUF_GENERIC)
4375+
if(debugmode==1 && !quiet && file_format == FileFormat::GGUF_GENERIC)
43744376
{
43754377
printf("\n");
43764378
llama_perf_context_print(llama_ctx_v4);

kcpp_adapters/AutoGuess.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,5 +109,16 @@
109109
"assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
110110
"assistant_end": "<|END_OF_TURN_TOKEN|>"
111111
}
112+
}, {
113+
"search": ["<|User|>"],
114+
"name": "DeepSeek V2.5",
115+
"adapter": {
116+
"system_start": "",
117+
"system_end": "",
118+
"user_start": "<|User|>",
119+
"user_end": "",
120+
"assistant_start": "<|Assistant|>",
121+
"assistant_end": "<|end▁of▁sentence|>"
122+
}
112123
}
113124
]

koboldcpp.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@
6666
modelbusy = threading.Lock()
6767
requestsinqueue = 0
6868
defaultport = 5001
69-
KcppVersion = "1.83000"
69+
KcppVersion = "1.83001"
7070
LcppVersion = "b4517"
7171
CudaSpecifics = "Cu124_Ar6175_SMC2_DmmvX32Y1"
72-
ReleaseDate = "2025/01/20"
72+
ReleaseDate = "2025/01/22"
7373
showdebug = True
7474
guimode = False
7575
showsamplerwarning = True
@@ -400,7 +400,7 @@ def pick_existant_file(ntoption,nonntoption):
400400
(lib_vulkan_noavx2, "Use Vulkan (Old CPU)"),
401401
(lib_clblast_noavx2, "Use CLBlast (Older CPU)"),
402402
(lib_failsafe, "Failsafe Mode (Older CPU)")]
403-
default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
403+
default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, vulkan_noavx2_option, clblast_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
404404
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
405405

406406
def init_library():
@@ -668,6 +668,8 @@ def exit_with_error(code, message, title="Error"):
668668
sys.exit(code)
669669

670670
def utfprint(str, importance = 2): #0 = only debugmode, 1 = except quiet, 2 = always print
671+
if args.quiet and importance<2: #quiet overrides debugmode
672+
return
671673
if args.debugmode < 1:
672674
if importance==1 and (args.debugmode == -1 or args.quiet):
673675
return

otherarch/tts_adapter.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,10 @@ static std::vector<float> embd_to_audio(
154154
const int n_codes,
155155
const int n_embd,
156156
const int n_thread) {
157-
const int n_hop = 600;
158-
const int n_fft = n_hop*4; //its 1280 at 320, or 2400 at 600
159-
const int n_win = n_hop*4;
157+
158+
const int n_fft = 1280; //its 1280 at 320, or 2400 at 600
159+
const int n_hop = 320;
160+
const int n_win = 1280;
160161
const int n_pad = (n_win - n_hop)/2;
161162
const int n_out = (n_codes - 1)*n_hop + n_win;
162163

@@ -624,7 +625,7 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
624625
{
625626
audio_seed = (((uint32_t)time(NULL)) % 1000000u);
626627
}
627-
if(ttsdebugmode==1)
628+
if(ttsdebugmode==1 && !inputs.quiet)
628629
{
629630
printf("\nUsing Speaker Seed: %d", speaker_seed);
630631
printf("\nUsing Audio Seed: %d", audio_seed);
@@ -640,13 +641,12 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
640641
&& last_generated_audio!=""
641642
&& last_generation_settings_prompt == std::string(inputs.prompt))
642643
{
643-
if(ttsdebugmode==1 || !inputs.quiet)
644-
{
644+
if (ttsdebugmode == 1 && !inputs.quiet) {
645645
printf("\nReusing Cached Audio.\n");
646-
output.data = last_generated_audio.c_str();
647-
output.status = 1;
648-
return output;
649646
}
647+
output.data = last_generated_audio.c_str();
648+
output.status = 1;
649+
return output;
650650
}
651651

652652

0 commit comments

Comments
 (0)