Merge remote-tracking branch 'origin/concedo_experimental' into remoteManagement

prima · prima · commit 7a1dac694b2b · 2025-05-11T00:15:04.000+01:00
diff --git a/klite.embd b/klite.embd
@@ -4223,7 +4223,7 @@ Current version indicated by LITEVER below.
 	function get_instruct_starttag(doTrim=true)
 	{
 		let instag = localsettings.instruct_starttag;
-		if(instag=="{{[INPUT]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()) && localsettings.placeholder_tags)
+		if(instag=="{{[INPUT]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()))
 		{
 			instag = "\n### Instruction:\n"; //backend not compatible with auto
 		}
@@ -4236,7 +4236,7 @@ Current version indicated by LITEVER below.
 	function get_instruct_endtag(doTrim=true)
 	{
 		let instag = localsettings.instruct_endtag;
-		if(instag=="{{[OUTPUT]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()) && localsettings.placeholder_tags)
+		if(instag=="{{[OUTPUT]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()))
 		{
 			instag = "\n### Response:\n"; //backend not compatible with auto
 		}
@@ -4249,7 +4249,7 @@ Current version indicated by LITEVER below.
 	function get_instruct_systag(doTrim=true)
 	{
 		let instag = localsettings.instruct_systag;
-		if(instag=="{{[SYSTEM]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()) && localsettings.placeholder_tags)
+		if(instag=="{{[SYSTEM]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()))
 		{
 			instag = ""; //backend not compatible with auto
 		}
@@ -16607,7 +16607,7 @@ Current version indicated by LITEVER below.
 		gentxt = trim_extra_stop_seqs(gentxt,false);
 
 		//fix alpaca leakage
-		if(localsettings.fix_alpaca_leak && (localsettings.opmode == 2 || localsettings.opmode == 3 || localsettings.opmode == 4) && get_instruct_starttag(true).toLowerCase().includes("### instruction"))
+		if(localsettings.fix_alpaca_leak && (localsettings.opmode == 2 || localsettings.opmode == 3 || localsettings.opmode == 4) && (get_instruct_starttag(true)=="{{[INPUT]}}" || get_instruct_starttag(true).toLowerCase().includes("### instruction")))
 		{
 			let matches = gentxt.match(/\n### (instruction|response)\n|\n### ([^\s]+?):\n/gi);
 			for(let m in matches)
@@ -16749,42 +16749,44 @@ Current version indicated by LITEVER below.
 		{
 			let st = get_instruct_starttag(true);
 			let et = get_instruct_endtag(true);
-			let stet_et = "";
-			if(localsettings.separate_end_tags && get_instruct_endtag_end(true))
+			let stripping_arr = [];
+			if(st!="")
 			{
-				stet_et = get_instruct_endtag_end(true);
+				stripping_arr.push(st);
 			}
-
-			//sometimes the OAI type endpoints get confused and repeat the instruct tag, so trim it
-			let earlymatch = gentxt.indexOf(et);
-			if(earlymatch==0)
+			if(et!="")
 			{
-				gentxt = gentxt.substring(et.length);
+				stripping_arr.push(et);
 			}
-
-			let found = gentxt.indexOf(st);
-			let splitresponse = [];
-			if (found != -1) //if found, truncate to it
+			if(st=="{{[INPUT]}}" || et=="{{[OUTPUT]}}")
 			{
-				splitresponse = gentxt.split(st);
-				gentxt = splitresponse[0];
+				stripping_arr.push("### Instruction:");
+				stripping_arr.push("### Response:");
 			}
-
-			found = gentxt.indexOf(et);
-			splitresponse = [];
-			if (found != -1) //if found, truncate to it
+			if(localsettings.separate_end_tags && get_instruct_endtag_end(true))
 			{
-				splitresponse = gentxt.split(et);
-				gentxt = splitresponse[0];
+				let stet_et = get_instruct_endtag_end(true);
+				if(stet_et!="")
+				{
+					stripping_arr.push(stet_et);
+				}
 			}
 
-			if(stet_et && stet_et!="")
+			//sometimes the OAI type endpoints get confused and repeat the instruct tag, so trim it
+			for(let i=0;i<stripping_arr.length;++i)
 			{
-				found = gentxt.indexOf(stet_et);
-				splitresponse = [];
+				let curtag = stripping_arr[i];
+				let earlymatch = gentxt.indexOf(curtag);
+				if(earlymatch==0)
+				{
+					gentxt = gentxt.substring(curtag.length);
+				}
+
+				let found = gentxt.indexOf(curtag);
+				let splitresponse = [];
 				if (found != -1) //if found, truncate to it
 				{
-					splitresponse = gentxt.split(stet_et);
+					splitresponse = gentxt.split(curtag);
 					gentxt = splitresponse[0];
 				}
 			}
diff --git a/koboldcpp.py b/koboldcpp.py
@@ -1292,8 +1292,7 @@ def generate(genparams, stream_flag=False):
     xtc_probability = tryparsefloat(genparams.get('xtc_probability', 0),0)
     sampler_order = genparams.get('sampler_order', [6, 0, 1, 3, 4, 2, 5])
     seed = tryparseint(genparams.get('sampler_seed', -1),-1)
-    stop_sequence = (genparams.get('stop_sequence', []) if genparams.get('stop_sequence', []) is not None else [])
-    stop_sequence = stop_sequence[:stop_token_max]
+    stop_sequence = genparams.get('stop_sequence', [])
     ban_eos_token = genparams.get('ban_eos_token', False)
     stream_sse = stream_flag
     grammar = genparams.get('grammar', '')
@@ -1317,25 +1316,6 @@ def generate(genparams, stream_flag=False):
     banned_tokens = genparams.get('banned_tokens', banned_strings)
     bypass_eos_token = genparams.get('bypass_eos', False)
     custom_token_bans = genparams.get('custom_token_bans', '')
-    replace_instruct_placeholders = genparams.get('replace_instruct_placeholders', False)
-    if replace_instruct_placeholders:
-        adapter_obj = {} if chatcompl_adapter is None else chatcompl_adapter
-        system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
-        user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")
-        user_message_end = adapter_obj.get("user_end", "")
-        assistant_message_start = adapter_obj.get("assistant_start", "\n### Response:\n")
-        assistant_message_end = adapter_obj.get("assistant_end", "")
-        prompt = prompt.replace("{{[INPUT]}}", assistant_message_end + user_message_start)
-        prompt = prompt.replace("{{[OUTPUT]}}", user_message_end + assistant_message_start)
-        prompt = prompt.replace("{{[SYSTEM]}}", system_message_start)
-        memory = memory.replace("{{[INPUT]}}", assistant_message_end + user_message_start)
-        memory = memory.replace("{{[OUTPUT]}}", user_message_end + assistant_message_start)
-        memory = memory.replace("{{[SYSTEM]}}", system_message_start)
-        for i in range(len(stop_sequence)):
-            if stop_sequence[i] == "{{[INPUT]}}":
-                stop_sequence[i] = user_message_start
-            elif stop_sequence[i] == "{{[OUTPUT]}}":
-                stop_sequence[i] = assistant_message_start
 
     for tok in custom_token_bans.split(','):
         tok = tok.strip()  # Remove leading/trailing whitespace
@@ -2505,6 +2485,34 @@ def transform_genparams(genparams, api_format):
         genparams["ollamasysprompt"] = ollamasysprompt
         genparams["ollamabodyprompt"] = ollamabodyprompt
         genparams["prompt"] = ollamasysprompt + ollamabodyprompt
+
+    #final transformations (universal template replace)
+    replace_instruct_placeholders = genparams.get('replace_instruct_placeholders', False)
+    stop_sequence = (genparams.get('stop_sequence', []) if genparams.get('stop_sequence', []) is not None else [])
+    stop_sequence = stop_sequence[:stop_token_max]
+    if replace_instruct_placeholders:
+        prompt = genparams.get('prompt', "")
+        memory = genparams.get('memory', "")
+        adapter_obj = {} if chatcompl_adapter is None else chatcompl_adapter
+        system_message_start = adapter_obj.get("system_start", "\n### Instruction:\n")
+        user_message_start = adapter_obj.get("user_start", "\n### Instruction:\n")
+        user_message_end = adapter_obj.get("user_end", "")
+        assistant_message_start = adapter_obj.get("assistant_start", "\n### Response:\n")
+        assistant_message_end = adapter_obj.get("assistant_end", "")
+        prompt = prompt.replace("{{[INPUT]}}", assistant_message_end + user_message_start)
+        prompt = prompt.replace("{{[OUTPUT]}}", user_message_end + assistant_message_start)
+        prompt = prompt.replace("{{[SYSTEM]}}", system_message_start)
+        memory = memory.replace("{{[INPUT]}}", assistant_message_end + user_message_start)
+        memory = memory.replace("{{[OUTPUT]}}", user_message_end + assistant_message_start)
+        memory = memory.replace("{{[SYSTEM]}}", system_message_start)
+        for i in range(len(stop_sequence)):
+            if stop_sequence[i] == "{{[INPUT]}}":
+                stop_sequence[i] = user_message_start
+            elif stop_sequence[i] == "{{[OUTPUT]}}":
+                stop_sequence[i] = assistant_message_start
+        genparams["prompt"] = prompt
+        genparams["memory"] = memory
+    genparams["stop_sequence"] = stop_sequence
     return genparams
 
 def LaunchWebbrowser(target_url, failedmsg):
@@ -4881,18 +4889,28 @@ def fetch_search_quants(a,b,c):
 
         def fetch_search_models():
             from tkinter import messagebox
-            nonlocal searchbox1, modelsearch1_var, modelsearch2_var
+            nonlocal searchbox1, searchbox2, modelsearch1_var, modelsearch2_var
             try:
                 modelsearch1_var.set("")
                 modelsearch2_var.set("")
+                searchbox1.configure(values=[])
+                searchbox2.configure(values=[])
                 searchedmodels = []
-                search = "GGUF " + model_search.get()
-                urlcode = urlparse.urlencode({"search":search,"limit":10}, doseq=True)
+                searchbase = model_search.get()
+                if searchbase.strip()=="":
+                    return
+                urlcode = urlparse.urlencode({"search":( "GGUF " + searchbase),"limit":10}, doseq=True)
+                urlcode2 = urlparse.urlencode({"search":searchbase,"limit":6}, doseq=True)
                 resp = make_url_request(f"https://huggingface.co/api/models?{urlcode}",None,'GET',{},10)
-                if len(resp)==0:
-                    messagebox.showinfo("No Results Found", "Search found no results")
                 for m in resp:
                     searchedmodels.append(m["id"])
+                if len(resp)<=3: #too few results, repeat search without GGUF in the string
+                    resp2 = make_url_request(f"https://huggingface.co/api/models?{urlcode2}",None,'GET',{},10)
+                    for m in resp2:
+                        searchedmodels.append(m["id"])
+
+                if len(searchedmodels)==0:
+                    messagebox.showinfo("No Results Found", "Search found no results")
                 searchbox1.configure(values=searchedmodels)
                 if len(searchedmodels)>0:
                     modelsearch1_var.set(searchedmodels[0])

Original file line number	Diff line number	Diff line change
`@@ -4223,7 +4223,7 @@ Current version indicated by LITEVER below.`
`4223`	`4223`	`function get_instruct_starttag(doTrim=true)`
`4224`	`4224`	`{`
`4225`	`4225`	`let instag = localsettings.instruct_starttag;`
`4226`		`- if(instag=="{{[INPUT]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()) && localsettings.placeholder_tags)`
	`4226`	`+ if(instag=="{{[INPUT]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()))`
`4227`	`4227`	`{`
`4228`	`4228`	`instag = "\n### Instruction:\n"; //backend not compatible with auto`
`4229`	`4229`	`}`
`@@ -4236,7 +4236,7 @@ Current version indicated by LITEVER below.`
`4236`	`4236`	`function get_instruct_endtag(doTrim=true)`
`4237`	`4237`	`{`
`4238`	`4238`	`let instag = localsettings.instruct_endtag;`
`4239`		`- if(instag=="{{[OUTPUT]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()) && localsettings.placeholder_tags)`
	`4239`	`+ if(instag=="{{[OUTPUT]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()))`
`4240`	`4240`	`{`
`4241`	`4241`	`instag = "\n### Response:\n"; //backend not compatible with auto`
`4242`	`4242`	`}`
`@@ -4249,7 +4249,7 @@ Current version indicated by LITEVER below.`
`4249`	`4249`	`function get_instruct_systag(doTrim=true)`
`4250`	`4250`	`{`
`4251`	`4251`	`let instag = localsettings.instruct_systag;`
`4252`		`- if(instag=="{{[SYSTEM]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()) && localsettings.placeholder_tags)`
	`4252`	`+ if(instag=="{{[SYSTEM]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()))`
`4253`	`4253`	`{`
`4254`	`4254`	`instag = ""; //backend not compatible with auto`
`4255`	`4255`	`}`
`@@ -16607,7 +16607,7 @@ Current version indicated by LITEVER below.`
`16607`	`16607`	`gentxt = trim_extra_stop_seqs(gentxt,false);`
`16608`	`16608`
`16609`	`16609`	`//fix alpaca leakage`
`16610`		`- if(localsettings.fix_alpaca_leak && (localsettings.opmode == 2 \|\| localsettings.opmode == 3 \|\| localsettings.opmode == 4) && get_instruct_starttag(true).toLowerCase().includes("### instruction"))`
	`16610`	`+ if(localsettings.fix_alpaca_leak && (localsettings.opmode == 2 \|\| localsettings.opmode == 3 \|\| localsettings.opmode == 4) && (get_instruct_starttag(true)=="{{[INPUT]}}" \|\| get_instruct_starttag(true).toLowerCase().includes("### instruction")))`
`16611`	`16611`	`{`
`16612`	`16612`	`let matches = gentxt.match(/\n### (instruction\|response)\n\|\n### ([^\s]+?):\n/gi);`
`16613`	`16613`	`for(let m in matches)`
`@@ -16749,42 +16749,44 @@ Current version indicated by LITEVER below.`
`16749`	`16749`	`{`
`16750`	`16750`	`let st = get_instruct_starttag(true);`
`16751`	`16751`	`let et = get_instruct_endtag(true);`
`16752`		`- let stet_et = "";`
`16753`		`- if(localsettings.separate_end_tags && get_instruct_endtag_end(true))`
	`16752`	`+ let stripping_arr = [];`
	`16753`	`+ if(st!="")`
`16754`	`16754`	`{`
`16755`		`- stet_et = get_instruct_endtag_end(true);`
	`16755`	`+ stripping_arr.push(st);`
`16756`	`16756`	`}`
`16757`		`-`
`16758`		`- //sometimes the OAI type endpoints get confused and repeat the instruct tag, so trim it`
`16759`		`- let earlymatch = gentxt.indexOf(et);`
`16760`		`- if(earlymatch==0)`
	`16757`	`+ if(et!="")`
`16761`	`16758`	`{`
`16762`		`- gentxt = gentxt.substring(et.length);`
	`16759`	`+ stripping_arr.push(et);`
`16763`	`16760`	`}`
`16764`		`-`
`16765`		`- let found = gentxt.indexOf(st);`
`16766`		`- let splitresponse = [];`
`16767`		`- if (found != -1) //if found, truncate to it`
	`16761`	`+ if(st=="{{[INPUT]}}" \|\| et=="{{[OUTPUT]}}")`
`16768`	`16762`	`{`
`16769`		`- splitresponse = gentxt.split(st);`
`16770`		`- gentxt = splitresponse[0];`
	`16763`	`+ stripping_arr.push("### Instruction:");`
	`16764`	`+ stripping_arr.push("### Response:");`
`16771`	`16765`	`}`
`16772`		`-`
`16773`		`- found = gentxt.indexOf(et);`
`16774`		`- splitresponse = [];`
`16775`		`- if (found != -1) //if found, truncate to it`
	`16766`	`+ if(localsettings.separate_end_tags && get_instruct_endtag_end(true))`
`16776`	`16767`	`{`
`16777`		`- splitresponse = gentxt.split(et);`
`16778`		`- gentxt = splitresponse[0];`
	`16768`	`+ let stet_et = get_instruct_endtag_end(true);`
	`16769`	`+ if(stet_et!="")`
	`16770`	`+ {`
	`16771`	`+ stripping_arr.push(stet_et);`
	`16772`	`+ }`
`16779`	`16773`	`}`
`16780`	`16774`
`16781`		`- if(stet_et && stet_et!="")`
	`16775`	`+ //sometimes the OAI type endpoints get confused and repeat the instruct tag, so trim it`
	`16776`	`+ for(let i=0;i<stripping_arr.length;++i)`
`16782`	`16777`	`{`
`16783`		`- found = gentxt.indexOf(stet_et);`
`16784`		`- splitresponse = [];`
	`16778`	`+ let curtag = stripping_arr[i];`
	`16779`	`+ let earlymatch = gentxt.indexOf(curtag);`
	`16780`	`+ if(earlymatch==0)`
	`16781`	`+ {`
	`16782`	`+ gentxt = gentxt.substring(curtag.length);`
	`16783`	`+ }`
	`16784`	`+`
	`16785`	`+ let found = gentxt.indexOf(curtag);`
	`16786`	`+ let splitresponse = [];`
`16785`	`16787`	`if (found != -1) //if found, truncate to it`
`16786`	`16788`	`{`
`16787`		`- splitresponse = gentxt.split(stet_et);`
	`16789`	`+ splitresponse = gentxt.split(curtag);`
`16788`	`16790`	`gentxt = splitresponse[0];`
`16789`	`16791`	`}`
`16790`	`16792`	`}`