Merge branch 'concedo_experimental' into crokeso

Nexesenex · Nexesenex · commit 25fc19a07177 · 2025-05-14T00:59:39.000+02:00
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -226,7 +226,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
 //#else
 //    GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
 //#endif // GGML_CUDA_FORCE_CUBLAS
-    GGML_LOG_INFO("---\nInitializing CUDA/HIP, please wait, the following step may take a few minutes (only for first launch)...\nJust a moment, Please Be Patient...\n---\n");
+    GGML_LOG_INFO("---\nInitializing CUDA/HIP, please wait, the following step may take a few minutes (only for first launch)...\n---\n");
     GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
     for (int id = 0; id < info.device_count; ++id) {
         int device_vmm = 0;
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -2081,8 +2081,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
         {
             printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamp ignored in debug.\n");
         } else {
-            printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 16.\n");
-            kcpp_data->n_ubatch = 16;
+            printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 8.\n");
+            kcpp_data->n_ubatch = 8;
         }
     }
     #endif
@@ -2679,6 +2679,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
                 add_bos_token = false;
             }
         }
+        printf("Starting model warm up, please wait a moment...\n");
 
         //warmup at least 33 tokens to trigger batch
         std::vector<int> tmp;
diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json
@@ -14,25 +14,25 @@
     "search": ["<|im_start|>assistant", "<|im_end|>", "You are provided with function signatures within <tools>"],
     "name": "ChatML (Qwen 2.5 based).",
     "adapter": {
-        "system_start": "<|im_start|>system\n\n",
-        "system_end": "<|im_end|>\n\n",
-        "user_start": "<|im_start|>user\n\n",
-        "user_end": "<|im_end|>\n\n",
-        "assistant_start": "<|im_start|>assistant\n\n",
-        "assistant_end": "<|im_end|>\n\n",
+        "system_start": "<|im_start|>system\n",
+        "system_end": "<|im_end|>\n",
+        "user_start": "<|im_start|>user\n",
+        "user_end": "<|im_end|>\n",
+        "assistant_start": "<|im_start|>assistant\n",
+        "assistant_end": "<|im_end|>\n",
         "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n",
         "tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n"
     }
 }, {
     "search": ["<|im_start|>assistant", "<|im_end|>"],
     "name": "ChatML (Generic).",
     "adapter": {
-        "system_start": "<|im_start|>system\n\n",
-        "system_end": "<|im_end|>\n\n",
-        "user_start": "<|im_start|>user\n\n",
-        "user_end": "<|im_end|>\n\n",
-        "assistant_start": "<|im_start|>assistant\n\n",
-        "assistant_end": "<|im_end|>\n\n"
+        "system_start": "<|im_start|>system\n",
+        "system_end": "<|im_end|>\n",
+        "user_start": "<|im_start|>user\n",
+        "user_end": "<|im_end|>\n",
+        "assistant_start": "<|im_start|>assistant\n",
+        "assistant_end": "<|im_end|>\n"
     }
 }, {
     "search": ["System role not supported", "<start_of_turn>"],
@@ -61,11 +61,11 @@
     "name": "Llama 3.x.",
     "adapter": {
         "system_start": "<|start_header_id|>system<|end_header_id|>\n\n",
-        "system_end": "<|eot_id|>\n\n",
+        "system_end": "<|eot_id|>",
         "user_start": "<|start_header_id|>user<|end_header_id|>\n\n",
-        "user_end": "<|eot_id|>\n\n",
+        "user_end": "<|eot_id|>",
         "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n",
-        "assistant_end": "<|eot_id|>\n\n"
+        "assistant_end": "<|eot_id|>"
     }
 }, {
     "search": ["<|header_start|>assistant<|header_end|>"],
diff --git a/klite.embd b/klite.embd
@@ -13874,6 +13874,7 @@ Current version indicated by LITEVER below.
 	{
 		document.getElementById("xtts_container").classList.add("hidden");
 		document.getElementById("oai_tts_container").classList.add("hidden");
+		document.getElementById("pollinations_tts_container").classList.add("hidden");
 		document.getElementById("alltalk_specific_controls").classList.add("hidden");
 		document.getElementById("kcpp_tts_container").classList.add("hidden");
 
@@ -13891,6 +13892,9 @@ Current version indicated by LITEVER below.
 		}
 		else if(selectedTTS == OAI_TTS_ID) {
 			document.getElementById("oai_tts_container").classList.remove("hidden");
+		}else if(selectedTTS == POLLINATIONS_TTS_ID)
+		{
+			document.getElementById("pollinations_tts_container").classList.remove("hidden");
 		}
 		else if(selectedTTS == KCPP_TTS_ID) {
 			document.getElementById("kcpp_tts_container").classList.remove("hidden");
@@ -14115,7 +14119,7 @@ Current version indicated by LITEVER below.
 			{
 				const pollinations_params = new URLSearchParams({
 					model:"openai-audio",
-					voice:"nova",
+					voice:document.getElementById("pollinations_voices").value,
 					private: true,
 					referrer: "koboldai"
 				});
@@ -22921,6 +22925,21 @@ Current version indicated by LITEVER below.
 								</tr><tr style="font-size:12px;padding:2px;margin:0px 0 0;"><td>TTS Voice </td><td><input class="settinglabel miniinput" type="text" value="alloy" id="oai_tts_voice" style="margin-left:3px; height:18px; width: 55px; padding: 2px;"></td></tr>
 								</table>
 							</div>
+							<div id="pollinations_tts_container" class="settinglabel hidden">
+								<table width="100%"><tr style="font-size:12px;padding:2px;margin:0px 0 0;"><td>Voice:</td><td>
+								<select class="form-control" id="pollinations_voices" style="font-size:12px;height:20px;padding:0;margin:0px 0 0;">
+								<option value="alloy">alloy</option>
+								<option value="ash">ash</option>
+								<option value="ballad">ballad</option>
+								<option value="coral">coral</option>
+								<option value="echo">echo</option>
+								<option value="fable">fable</option>
+								<option value="nova" selected>nova</option>
+								<option value="onyx">onyx</option>
+								<option value="sage">sage</option>
+								<option value="shimmer">shimmer</option>
+								</select></td></tr></table>
+							</div>
 							<div id="kcpp_tts_container" class="hidden">
 								<div class="color_red hidden" id="nokcpptts">KoboldCpp TTS Unavailable</div>
 								<div class="settinglabel">
@@ -22991,7 +23010,7 @@ Current version indicated by LITEVER below.
 								<input title="Negative Prompt" style="width:calc(100% - 110px);" type="text" placeholder="Default Negative Prompt. Put &quot;none&quot; to skip" value="" id="negpromptinput">
 							</div>
 							<div class="inlinelabel">
-								<div class="justifyleft rowitem">Number of Steps: </div>
+								<div class="justifyleft rowitem">Step Count: </div>
 								<input title="Number of Steps" type="text" inputmode="decimal" id="img_steps" style="width:60px">
 							</div>
 							<div class="inlinelabel">
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
@@ -373,7 +373,7 @@ bool llama_kv_cache_unified::update(llama_context & lctx) {
             printf("\nWARNING: The current KV cache / model configuration does not support K-shift");
         } else {
 
-        LLAMA_LOG_DEBUG("%s: applying K-shift\n", __func__);
+        //LLAMA_LOG_DEBUG("%s: applying K-shift\n", __func__);
 
         // apply K-shift if needed
         if (hparams.rope_type != LLAMA_ROPE_TYPE_NONE) {
diff --git a/tools/quantclip.cpp b/tools/quantclip.cpp
@@ -1,7 +1,7 @@
 #include "ggml.h"
 #include "common.h"
-#include "clip.h"
-#include "llava.h"
+#include "mtmd/clip.h"
+#include "mtmd/llava.h"
 #include "llama.h"
 
 #include "base64.hpp"

Original file line number	Diff line number	Diff line change
`@@ -2081,8 +2081,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in`
`2081`	`2081`	`{`
`2082`	`2082`	`printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamp ignored in debug.\n");`
`2083`	`2083`	`} else {`
`2084`		`- printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 16.\n");`
`2085`		`- kcpp_data->n_ubatch = 16;`
	`2084`	`+ printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 8.\n");`
	`2085`	`+ kcpp_data->n_ubatch = 8;`
`2086`	`2086`	`}`
`2087`	`2087`	`}`
`2088`	`2088`	`#endif`
`@@ -2679,6 +2679,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in`
`2679`	`2679`	`add_bos_token = false;`
`2680`	`2680`	`}`
`2681`	`2681`	`}`
	`2682`	`+ printf("Starting model warm up, please wait a moment...\n");`
`2682`	`2683`
`2683`	`2684`	`//warmup at least 33 tokens to trigger batch`
`2684`	`2685`	`std::vector<int> tmp;`