Skip to content

Commit 93a226d

Browse files
committed
added prefix for llava, reverted system role in template as it degreaded gemma3. truncated debug logs
1 parent b314338 commit 93a226d

File tree

3 files changed

+54
-24
lines changed

3 files changed

+54
-24
lines changed

gpttype_adapter.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2796,11 +2796,12 @@ int GetThreadsToUse(bool blasmode)
27962796
}
27972797

27982798
//this function prepares the clip embds for llava. it's only needed when images change
2799-
static void PrepareLlavaEmbds(const int nctx, const std::vector<int> & llava_sep)
2799+
static void PrepareLlavaEmbds(const int nctx, const std::vector<int> & llava_sep, const std::vector<int> & llava_intro)
28002800
{
28012801
if(clp_ctx!=nullptr && clp_img_data!=nullptr)
28022802
{
28032803
int sepsize = llava_sep.size();
2804+
int introsize = llava_intro.size();
28042805
last_llava_mem.clear();
28052806

28062807
for(int i=0;i<llava_images.size();++i)
@@ -2829,6 +2830,10 @@ static void PrepareLlavaEmbds(const int nctx, const std::vector<int> & llava_sep
28292830
if(llava_images[i].clp_image_tokens>0 && llava_images[i].clp_image_tokens < nctx)
28302831
{
28312832
int tokcnt = (i==0?(llava_images[i].clp_image_tokens):(llava_images[i].clp_image_tokens+sepsize));
2833+
if(i==0)
2834+
{
2835+
tokcnt += introsize;
2836+
}
28322837
for(int n=0;n<tokcnt;++n)
28332838
{
28342839
last_llava_mem.push_back(current_llava_identifier);
@@ -3144,21 +3149,23 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
31443149
std::vector<int> embd_inp;
31453150
std::vector<int> embd_inp_mem; //for storing added memory
31463151
std::vector<int> llava_sep; //to separate between different llava images
3152+
std::vector<int> llava_intro; //to separate between different llava images
31473153
bool llava_embds_built = false;
31483154

31493155
int32_t nctx = kcpp_data->n_ctx;
31503156

31513157
TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token);
31523158
bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
31533159
TokenizeString("\n\n", llava_sep, file_format, false);
3160+
TokenizeString("\nImages:\n", llava_intro, file_format, false);
31543161

31553162
if(llava_composite_image_signature=="")
31563163
{
31573164
last_llava_mem.clear();
31583165
}
31593166
if(llava_images_changed)
31603167
{
3161-
PrepareLlavaEmbds(nctx, llava_sep);
3168+
PrepareLlavaEmbds(nctx, llava_sep, llava_intro);
31623169
llava_embds_built = true;
31633170
}
31643171

@@ -3872,7 +3879,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
38723879
{
38733880
if(!llava_embds_built) //this should never happen! however, handle it anyway
38743881
{
3875-
PrepareLlavaEmbds(nctx, llava_sep);
3882+
PrepareLlavaEmbds(nctx, llava_sep, llava_intro);
38763883
llava_embds_built = true;
38773884
printf("\nSomehow vision embd was not prepared (maybe no fast forward), rebuilding it...\n");
38783885
}
@@ -3888,6 +3895,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
38883895
int llavatokenscounted = 0;
38893896
int llavatokensevaled = 0;
38903897
int sepsize = llava_sep.size();
3898+
int introsize = llava_intro.size();
38913899
while(input_consumed < embd_inp.size() && (embd_inp[input_consumed]==LLAVA_TOKEN_IDENTIFIER_A || embd_inp[input_consumed]==LLAVA_TOKEN_IDENTIFIER_B))
38923900
{
38933901
if (!last_n_tokens.empty())
@@ -3902,7 +3910,23 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
39023910
for(int i=0;i<llava_images.size();++i)
39033911
{
39043912
//note: no handling for draft_ctx as we don't support vision for it
3905-
if(i>0 && sepsize>0)
3913+
if(introsize>0 && i==0)
3914+
{
3915+
//added at the start of everything
3916+
kcpp_embd_batch batch = kcpp_embd_batch(llava_intro, n_past, use_mrope, false);
3917+
auto evr = llama_decode(llama_ctx_v4, batch.batch);
3918+
if(evr!=0)
3919+
{
3920+
printf("\nError when appending llava intro: %d\n",evr);
3921+
}
3922+
else
3923+
{
3924+
printf("\rProcessing LLaVa Intro (%d tokens)",introsize);
3925+
}
3926+
n_past += introsize;
3927+
llavatokensevaled += introsize;
3928+
}
3929+
if(sepsize>0 && i>0)
39063930
{
39073931
//add a separator between each image
39083932
kcpp_embd_batch batch = kcpp_embd_batch(llava_sep, n_past, use_mrope, false);

kcpp_adapters/AutoGuess.json

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@
3838
"search": ["System role not supported", "<start_of_turn>"],
3939
"name": "Google Gemma 2.",
4040
"adapter": {
41-
"system_start": "<start_of_turn>user\n",
42-
"system_end": "<end_of_turn>\n",
4341
"user_start": "<start_of_turn>user\n",
4442
"user_end": "<end_of_turn>\n",
4543
"assistant_start": "<start_of_turn>model\n",
@@ -49,8 +47,6 @@
4947
"search": ["<start_of_image>", "<start_of_turn>", "<end_of_turn>"],
5048
"name": "Google Gemma 3.",
5149
"adapter": {
52-
"system_start": "<start_of_turn>user\n",
53-
"system_end": "<end_of_turn>\n",
5450
"user_start": "<start_of_turn>user\n",
5551
"user_end": "<end_of_turn>\n",
5652
"assistant_start": "<start_of_turn>model\n",

koboldcpp.py

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
dry_seq_break_max = 128
5050

5151
# global vars
52-
KcppVersion = "1.87.3"
52+
KcppVersion = "1.87.4"
5353
showdebug = True
5454
kcpp_instance = None #global running instance
5555
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False}
@@ -720,6 +720,22 @@ def string_contains_or_overlaps_sequence_substring(inputstr, sequences):
720720
return True
721721
return False
722722

723+
def truncate_long_json(data, max_length):
724+
if isinstance(data, dict):
725+
new_data = {}
726+
for key, value in data.items():
727+
if isinstance(value, str):
728+
new_data[key] = value[:max_length] + "..." if len(value) > max_length else value
729+
else:
730+
new_data[key] = truncate_long_json(value, max_length)
731+
return new_data
732+
elif isinstance(data, list):
733+
return [truncate_long_json(item, max_length) for item in data]
734+
elif isinstance(data, str):
735+
return data[:max_length] + "..." if len(data) > max_length else data
736+
else:
737+
return data
738+
723739
def get_capabilities():
724740
global savedata_obj, has_multiplayer, KcppVersion, friendlymodelname, friendlysdmodelname, fullsdmodelpath, mmprojpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath
725741
has_llm = not (friendlymodelname=="inactive")
@@ -2745,11 +2761,11 @@ def do_POST(self):
27452761
body = None
27462762
if contlenstr:
27472763
content_length = int(contlenstr)
2748-
if content_length > (1024*1024*32): #32mb payload limit
2764+
if content_length > (1024*1024*48): #48mb payload limit
27492765
self.send_response(500)
27502766
self.end_headers(content_type='application/json')
27512767
self.wfile.write(json.dumps({"detail": {
2752-
"msg": "Payload is too big. Max payload size is 32MB.",
2768+
"msg": "Payload is too big. Max payload size is 48MB.",
27532769
"type": "bad_input",
27542770
}}).encode())
27552771
return
@@ -2765,11 +2781,11 @@ def do_POST(self):
27652781
if line:
27662782
chunk_length = max(0,int(line, 16))
27672783
content_length += chunk_length
2768-
if not line or chunklimit > 512 or content_length > (1024*1024*32): #32mb payload limit
2784+
if not line or chunklimit > 512 or content_length > (1024*1024*48): #48mb payload limit
27692785
self.send_response(500)
27702786
self.end_headers(content_type='application/json')
27712787
self.wfile.write(json.dumps({"detail": {
2772-
"msg": "Payload is too big. Max payload size is 32MB.",
2788+
"msg": "Payload is too big. Max payload size is 48MB.",
27732789
"type": "bad_input",
27742790
}}).encode())
27752791
return
@@ -3178,17 +3194,11 @@ def do_POST(self):
31783194
}}).encode())
31793195
return
31803196

3181-
3182-
tmpimgs = genparams.get("images", []) # reduce amount of text printed to terminal when dumping large images
3183-
if tmpimgs and isinstance(tmpimgs, (list, tuple)) and len(tmpimgs)>0:
3184-
printablegenparams = copy.deepcopy(genparams)
3185-
outarr = []
3186-
for img in tmpimgs:
3187-
outarr.append(str(img[:512])+"...")
3188-
printablegenparams["images"] = outarr
3189-
utfprint("\nInput: " + json.dumps(printablegenparams),1)
3190-
else:
3191-
utfprint("\nInput: " + json.dumps(genparams),1)
3197+
trunc_len = 8000
3198+
if args.debugmode >= 1:
3199+
trunc_len = 16000
3200+
printablegenparams = truncate_long_json(genparams,trunc_len)
3201+
utfprint("\nInput: " + json.dumps(printablegenparams),1)
31923202

31933203
if args.foreground:
31943204
bring_terminal_to_foreground()

0 commit comments

Comments
 (0)