Skip to content

Commit 9e77547

Browse files
committed
Merge branch 'concedo_experimental' into crokeso
2 parents 5d4876f + a87c05f commit 9e77547

File tree

5 files changed

+166
-139
lines changed

5 files changed

+166
-139
lines changed

gpttype_adapter.cpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ static std::vector<int> last_media_mem; //for storing dummy tokens that will be
114114
static std::string media_composite_image_signature = ""; //for identifying when the llava images change, we need to invalidate the cache
115115
static int current_media_identifier = MEDIA_TOKEN_IDENTIFIER_A;
116116
static int vision_max_res = 2048;
117+
static bool use_mrope = false;
117118

118119
static kcpp_params * kcpp_data = nullptr;
119120
static int max_context_limit_at_load = 0;
@@ -785,7 +786,7 @@ static speculative_draft_result speculative_decoding_eval_chunk(llama_context *
785786

786787
std::vector<int> real_embd = drafted_ids;
787788
real_embd.pop_back();
788-
bool use_mrope = (file_format==FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
789+
789790
kcpp_embd_batch batch2 = kcpp_embd_batch(real_embd, actual_npast, use_mrope, true);
790791
auto draftok = (llama_decode(main_ctx, batch2.batch)==0); //actual eval for big model
791792
if(!draftok)
@@ -1893,7 +1894,6 @@ static void load_grammar(const std::string & gammarstr)
18931894

18941895
static bool kcpp_eval_image(llama_context * ctx_llama, float * img_embd, int num_img_tokens, int n_batch, int * n_past) {
18951896
int n_embd = llama_n_embd(llama_get_model(ctx_llama));
1896-
bool use_mrope = (file_format==FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
18971897

18981898
for (int i = 0; i < num_img_tokens; i += n_batch) {
18991899
int n_eval = num_img_tokens - i;
@@ -2134,6 +2134,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
21342134
guidance_ctx = nullptr;
21352135
audio_multimodal_supported = false;
21362136
vision_multimodal_supported = false;
2137+
use_mrope = false;
21372138

21382139
auto clamped_max_context_length = inputs.max_context_length;
21392140

@@ -2532,6 +2533,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
25322533
{
25332534
printf("\nMRope is used, context shift will be disabled!\n");
25342535
kcpp_data->use_contextshift = false;
2536+
use_mrope = true;
25352537
}
25362538

25372539
if(overwriteRope)
@@ -3576,8 +3578,25 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
35763578
media_object lv;
35773579
lv.b64data = item;
35783580
lv.is_audio = true;
3579-
TokenizeString("<audio>", lv.chunk_start_seq, file_format, false);
3580-
TokenizeString("</audio>\n\n", lv.chunk_end_seq, file_format, false);
3581+
std::string aud_start = "<audio>";
3582+
std::string aud_end = "</audio>\n\n";
3583+
if(clp_ctx_a)
3584+
{
3585+
int ptype = clip_get_projector_type_ext(clp_ctx_a);
3586+
if(ptype==14) //qwen omni
3587+
{
3588+
aud_start = "<|audio_bos|>";
3589+
aud_end = "<|audio_eos|>\n";
3590+
}
3591+
else if(ptype==16) //voxtral
3592+
{
3593+
aud_start = "[INST][BEGIN_AUDIO]";
3594+
aud_end = "[/INST]\n";
3595+
}
3596+
}
3597+
3598+
TokenizeString(aud_start, lv.chunk_start_seq, file_format, false);
3599+
TokenizeString(aud_end, lv.chunk_end_seq, file_format, false);
35813600
media_objects.push_back(lv);
35823601
new_media_composite += item;
35833602
}
@@ -3757,7 +3776,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
37573776
int32_t nctx = kcpp_data->n_ctx;
37583777

37593778
TokenizeString(kcpp_data->prompt, embd_inp, file_format, add_bos_token);
3760-
bool use_mrope = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2VL);
37613779
TokenizeString("\nAttached Media:\n", media_intro, file_format, false);
37623780

37633781
if(media_composite_image_signature=="")

klite.embd

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
1212
-->
1313

1414
<script id="init-config">
15-
const LITEVER = 262;
15+
const LITEVER = 266;
1616
const urlParams = new URLSearchParams(window.location.search);
1717
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
1818
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@@ -3620,7 +3620,7 @@ Current version indicated by LITEVER below.
36203620
websearch_retain: false,
36213621
websearch_template: "",
36223622

3623-
max_context_length: (localflag?4096:2048),
3623+
max_context_length: (localflag?4096:3072),
36243624
max_length: (localflag?512:256),
36253625
auto_ctxlen: true,
36263626
auto_genamt: true,
@@ -11788,11 +11788,6 @@ Current version indicated by LITEVER below.
1178811788
document.getElementById("max_context_length_slide").max = ep_maxctx;
1178911789
document.getElementById("max_context_length_slide_label").innerText = ep_maxctx;
1179011790
}
11791-
if(ep_maxctx && ep_maxctx>4096 && document.getElementById("max_length_slide").max<1024)
11792-
{
11793-
document.getElementById("max_length_slide").max = 1024;
11794-
document.getElementById("max_length_slide_label").innerText = 1024;
11795-
}
1179611791

1179711792
}).catch(error => {
1179811793
console.log("Failed to get KAI max ctx: " + error);
@@ -11850,17 +11845,17 @@ Current version indicated by LITEVER below.
1185011845
document.getElementById("max_context_length_slide").max = ep_maxctx;
1185111846
document.getElementById("max_context_length_slide_label").innerText = ep_maxctx;
1185211847
}
11853-
if(ep_maxctx && ep_maxctx>=4096 && document.getElementById("max_length_slide").max<1024)
11854-
{
11855-
document.getElementById("max_length_slide").max = 1024;
11856-
document.getElementById("max_length_slide_label").innerText = 1024;
11857-
}
11858-
if(ep_maxctx && ep_maxctx>=16384 && document.getElementById("max_length_slide").max<2048)
11848+
if(ep_maxctx && ep_maxctx>=8192 && document.getElementById("max_length_slide").max<2048)
1185911849
{
1186011850
document.getElementById("max_length_slide").max = 2048;
1186111851
document.getElementById("max_length_slide_label").innerText = 2048;
1186211852
}
11863-
if(localflag && localsettings.max_context_length==4096 && ep_maxctx>4096)
11853+
if(ep_maxctx && ep_maxctx>=16384 && document.getElementById("max_length_slide").max<4096)
11854+
{
11855+
document.getElementById("max_length_slide").max = 4096;
11856+
document.getElementById("max_length_slide_label").innerText = 4096;
11857+
}
11858+
if(localflag && localsettings.max_context_length==defaultsettings.max_context_length && ep_maxctx>4096)
1186411859
{
1186511860
localsettings.max_context_length = ep_maxctx;
1186611861
}
@@ -25023,7 +25018,7 @@ Current version indicated by LITEVER below.
2502325018
<div class="flex" style="margin-top: 6px;">
2502425019
<div id="actionmenuitems">
2502525020
<button type="button" class="btn btn-primary mainnav" id="btn_actmem" onclick="btn_memory()">Context</button>
25026-
<button type="button" class="btn btn-primary mainnav" id="btn_actundo" onpointerdown="btn_back_longpress_start()" onpointerleave="btn_back_longpress_end()" onpointerup="btn_back_longpress_end()" onclick="btn_back()">Back</button>
25021+
<button type="button" class="btn btn-primary mainnav" id="btn_actundo" onpointerdown="btn_back_longpress_start()" onpointerleave="btn_back_longpress_end()" onpointerup="btn_back_longpress_end()" onclick="btn_back()">Undo</button>
2502725022
<button type="button" class="btn btn-primary mainnav" id="btn_actredo" onpointerdown="btn_redo_longpress_start()" onpointerleave="btn_redo_longpress_end()" onpointerup="btn_redo_longpress_end()" onclick="btn_redo()">Redo</button>
2502825023
<button type="button" class="btn btn-primary mainnav" id="btn_actretry" onclick="btn_retry()">Retry</button>
2502925024
<button type="button" class="btn btn-primary bg_green mainnav" id="btn_addmedia" onclick="add_media_btn_menu()">Add File</button>
@@ -25073,7 +25068,7 @@ Current version indicated by LITEVER below.
2507325068
<div class="flex hidden" id="actionmenu2">
2507425069
<div id="actionmenuitems2" class="borderbox flex-push-right" style="margin-bottom: 2px;">
2507525070
<button type="button" class="btn btn-primary mainnav" id="btn_actmem2" onclick="btn_memory()">Context</button>
25076-
<button type="button" class="btn btn-primary mainnav" id="btn_actundo2" onpointerdown="btn_back_longpress_start()" onpointerleave="btn_back_longpress_end()" onpointerup="btn_back_longpress_end()" onclick="btn_back()">Back</button>
25071+
<button type="button" class="btn btn-primary mainnav" id="btn_actundo2" onpointerdown="btn_back_longpress_start()" onpointerleave="btn_back_longpress_end()" onpointerup="btn_back_longpress_end()" onclick="btn_back()">Undo</button>
2507725072
<button type="button" class="btn btn-primary mainnav" id="btn_actredo2" onpointerdown="btn_redo_longpress_start()" onpointerleave="btn_redo_longpress_end()" onpointerup="btn_redo_longpress_end()" onclick="btn_redo()">Redo</button>
2507825073
<button type="button" class="btn btn-primary mainnav" id="btn_actretry2" onclick="btn_retry()">Retry</button>
2507925074
<button type="button" class="btn btn-primary bg_green mainnav" id="btn_addmedia2" onclick="add_media_btn_menu()">Add File</button>
@@ -25597,7 +25592,7 @@ Current version indicated by LITEVER below.
2559725592

2559825593
<div class="settingitem">
2559925594
<div class="settinglabel">
25600-
<div class="justifyleft settingsmall">Context Size <span class="helpicon">?<span class="helptext">Maximum number of context tokens submitted to the AI. Must exceed max output tokens. Can be further increased by editing the textbox. Older models stop at 2048, newer ones can do 4096 or greater.</span></span></div>
25595+
<div class="justifyleft settingsmall">Context Size <span class="helpicon">?<span class="helptext">Maximum number of context tokens submitted to the AI. Must exceed max output tokens. Can be further increased by editing the textbox. This value determines how much history text the AI remembers.</span></span></div>
2560125596
<input title="Context Size" inputmode="numeric" class="justifyright flex-push-right settingsmall widerinput" id="max_context_length" oninput="
2560225597
document.getElementById('max_context_length_slide').value = this.value;">
2560325598
</div>
@@ -25616,15 +25611,15 @@ Current version indicated by LITEVER below.
2561625611
<div class="settingitem">
2561725612
<div class="settinglabel">
2561825613
<div class="justifyleft settingsmall">Max Output <span class="helpicon">?<span
25619-
class="helptext">Number of tokens the AI should generate. Higher numbers will take longer to generate.</span></span></div>
25614+
class="helptext">Maximum number of tokens the AI should generate. Higher numbers will take longer to generate. AI can stop before the limit.</span></span></div>
2562025615
<input title="Max Output" inputmode="numeric" class="justifyright flex-push-right settingsmall" id="max_length" oninput="
2562125616
document.getElementById('max_length_slide').value = this.value;">
2562225617
</div>
2562325618
<div><input title="Max Output Slider" type="range" min="32" max="16384" step="32" id="max_length_slide" oninput="
25624-
document.getElementById('max_length').value = this.value;"></div>
25619+
document.getElementById('max_length').value = this.value;"></div>
2562525620
<div class="settingminmax">
2562625621
<div class="justifyleft">16</div>
25627-
<div class="justifyright" id="max_length_slide_label">8192</div>
25622+
<div class="justifyright" id="max_length_slide_label">1024</div>
2562825623
</div>
2562925624
<div id="auto_genamt_panel" class="settinglabel">
2563025625
<div class="justifyleft settingsmall" title="Automatically lowers settings if incompatible with existing workers">Auto-Adjust Limits </div>

0 commit comments

Comments
 (0)