Skip to content

Commit 25fc19a

Browse files
committed
Merge branch 'concedo_experimental' into crokeso
2 parents e55c1df + b686f4b commit 25fc19a

File tree

6 files changed

+43
-23
lines changed

6 files changed

+43
-23
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
226226
//#else
227227
// GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
228228
//#endif // GGML_CUDA_FORCE_CUBLAS
229-
GGML_LOG_INFO("---\nInitializing CUDA/HIP, please wait, the following step may take a few minutes (only for first launch)...\nJust a moment, Please Be Patient...\n---\n");
229+
GGML_LOG_INFO("---\nInitializing CUDA/HIP, please wait, the following step may take a few minutes (only for first launch)...\n---\n");
230230
GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
231231
for (int id = 0; id < info.device_count; ++id) {
232232
int device_vmm = 0;

gpttype_adapter.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2081,8 +2081,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
20812081
{
20822082
printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamp ignored in debug.\n");
20832083
} else {
2084-
printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 16.\n");
2085-
kcpp_data->n_ubatch = 16;
2084+
printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 8.\n");
2085+
kcpp_data->n_ubatch = 8;
20862086
}
20872087
}
20882088
#endif
@@ -2679,6 +2679,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
26792679
add_bos_token = false;
26802680
}
26812681
}
2682+
printf("Starting model warm up, please wait a moment...\n");
26822683

26832684
//warmup at least 33 tokens to trigger batch
26842685
std::vector<int> tmp;

kcpp_adapters/AutoGuess.json

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,25 @@
1414
"search": ["<|im_start|>assistant", "<|im_end|>", "You are provided with function signatures within <tools>"],
1515
"name": "ChatML (Qwen 2.5 based).",
1616
"adapter": {
17-
"system_start": "<|im_start|>system\n\n",
18-
"system_end": "<|im_end|>\n\n",
19-
"user_start": "<|im_start|>user\n\n",
20-
"user_end": "<|im_end|>\n\n",
21-
"assistant_start": "<|im_start|>assistant\n\n",
22-
"assistant_end": "<|im_end|>\n\n",
17+
"system_start": "<|im_start|>system\n",
18+
"system_end": "<|im_end|>\n",
19+
"user_start": "<|im_start|>user\n",
20+
"user_end": "<|im_end|>\n",
21+
"assistant_start": "<|im_start|>assistant\n",
22+
"assistant_end": "<|im_end|>\n",
2323
"tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n",
2424
"tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n"
2525
}
2626
}, {
2727
"search": ["<|im_start|>assistant", "<|im_end|>"],
2828
"name": "ChatML (Generic).",
2929
"adapter": {
30-
"system_start": "<|im_start|>system\n\n",
31-
"system_end": "<|im_end|>\n\n",
32-
"user_start": "<|im_start|>user\n\n",
33-
"user_end": "<|im_end|>\n\n",
34-
"assistant_start": "<|im_start|>assistant\n\n",
35-
"assistant_end": "<|im_end|>\n\n"
30+
"system_start": "<|im_start|>system\n",
31+
"system_end": "<|im_end|>\n",
32+
"user_start": "<|im_start|>user\n",
33+
"user_end": "<|im_end|>\n",
34+
"assistant_start": "<|im_start|>assistant\n",
35+
"assistant_end": "<|im_end|>\n"
3636
}
3737
}, {
3838
"search": ["System role not supported", "<start_of_turn>"],
@@ -61,11 +61,11 @@
6161
"name": "Llama 3.x.",
6262
"adapter": {
6363
"system_start": "<|start_header_id|>system<|end_header_id|>\n\n",
64-
"system_end": "<|eot_id|>\n\n",
64+
"system_end": "<|eot_id|>",
6565
"user_start": "<|start_header_id|>user<|end_header_id|>\n\n",
66-
"user_end": "<|eot_id|>\n\n",
66+
"user_end": "<|eot_id|>",
6767
"assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n",
68-
"assistant_end": "<|eot_id|>\n\n"
68+
"assistant_end": "<|eot_id|>"
6969
}
7070
}, {
7171
"search": ["<|header_start|>assistant<|header_end|>"],

klite.embd

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13874,6 +13874,7 @@ Current version indicated by LITEVER below.
1387413874
{
1387513875
document.getElementById("xtts_container").classList.add("hidden");
1387613876
document.getElementById("oai_tts_container").classList.add("hidden");
13877+
document.getElementById("pollinations_tts_container").classList.add("hidden");
1387713878
document.getElementById("alltalk_specific_controls").classList.add("hidden");
1387813879
document.getElementById("kcpp_tts_container").classList.add("hidden");
1387913880

@@ -13891,6 +13892,9 @@ Current version indicated by LITEVER below.
1389113892
}
1389213893
else if(selectedTTS == OAI_TTS_ID) {
1389313894
document.getElementById("oai_tts_container").classList.remove("hidden");
13895+
}else if(selectedTTS == POLLINATIONS_TTS_ID)
13896+
{
13897+
document.getElementById("pollinations_tts_container").classList.remove("hidden");
1389413898
}
1389513899
else if(selectedTTS == KCPP_TTS_ID) {
1389613900
document.getElementById("kcpp_tts_container").classList.remove("hidden");
@@ -14115,7 +14119,7 @@ Current version indicated by LITEVER below.
1411514119
{
1411614120
const pollinations_params = new URLSearchParams({
1411714121
model:"openai-audio",
14118-
voice:"nova",
14122+
voice:document.getElementById("pollinations_voices").value,
1411914123
private: true,
1412014124
referrer: "koboldai"
1412114125
});
@@ -22921,6 +22925,21 @@ Current version indicated by LITEVER below.
2292122925
</tr><tr style="font-size:12px;padding:2px;margin:0px 0 0;"><td>TTS Voice </td><td><input class="settinglabel miniinput" type="text" value="alloy" id="oai_tts_voice" style="margin-left:3px; height:18px; width: 55px; padding: 2px;"></td></tr>
2292222926
</table>
2292322927
</div>
22928+
<div id="pollinations_tts_container" class="settinglabel hidden">
22929+
<table width="100%"><tr style="font-size:12px;padding:2px;margin:0px 0 0;"><td>Voice:</td><td>
22930+
<select class="form-control" id="pollinations_voices" style="font-size:12px;height:20px;padding:0;margin:0px 0 0;">
22931+
<option value="alloy">alloy</option>
22932+
<option value="ash">ash</option>
22933+
<option value="ballad">ballad</option>
22934+
<option value="coral">coral</option>
22935+
<option value="echo">echo</option>
22936+
<option value="fable">fable</option>
22937+
<option value="nova" selected>nova</option>
22938+
<option value="onyx">onyx</option>
22939+
<option value="sage">sage</option>
22940+
<option value="shimmer">shimmer</option>
22941+
</select></td></tr></table>
22942+
</div>
2292422943
<div id="kcpp_tts_container" class="hidden">
2292522944
<div class="color_red hidden" id="nokcpptts">KoboldCpp TTS Unavailable</div>
2292622945
<div class="settinglabel">
@@ -22991,7 +23010,7 @@ Current version indicated by LITEVER below.
2299123010
<input title="Negative Prompt" style="width:calc(100% - 110px);" type="text" placeholder="Default Negative Prompt. Put &quot;none&quot; to skip" value="" id="negpromptinput">
2299223011
</div>
2299323012
<div class="inlinelabel">
22994-
<div class="justifyleft rowitem">Number of Steps: </div>
23013+
<div class="justifyleft rowitem">Step Count: </div>
2299523014
<input title="Number of Steps" type="text" inputmode="decimal" id="img_steps" style="width:60px">
2299623015
</div>
2299723016
<div class="inlinelabel">

src/llama-kv-cache.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ bool llama_kv_cache_unified::update(llama_context & lctx) {
373373
printf("\nWARNING: The current KV cache / model configuration does not support K-shift");
374374
} else {
375375

376-
LLAMA_LOG_DEBUG("%s: applying K-shift\n", __func__);
376+
//LLAMA_LOG_DEBUG("%s: applying K-shift\n", __func__);
377377

378378
// apply K-shift if needed
379379
if (hparams.rope_type != LLAMA_ROPE_TYPE_NONE) {

tools/quantclip.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include "ggml.h"
22
#include "common.h"
3-
#include "clip.h"
4-
#include "llava.h"
3+
#include "mtmd/clip.h"
4+
#include "mtmd/llava.h"
55
#include "llama.h"
66

77
#include "base64.hpp"

0 commit comments

Comments
 (0)