Skip to content

Commit 6a03fe5

Browse files
authored
Merge pull request #21 from esolithe/concedo_experimental
Concedo experimental
2 parents da1b983 + ce7aa0d commit 6a03fe5

File tree

14 files changed

+715
-1115
lines changed

14 files changed

+715
-1115
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ SIMPLERCFLAGS =
7979
FULLCFLAGS =
8080
NONECFLAGS =
8181

82+
# prefer bundled glslc
83+
LLAMA_USE_BUNDLED_GLSLC := 1
84+
8285
CLBLAST_FLAGS = -DGGML_USE_CLBLAST
8386
FAILSAFE_FLAGS = -DUSE_FAILSAFE
8487
VULKAN_FLAGS = -DGGML_USE_VULKAN -DSD_USE_VULKAN

convert_hf_to_gguf.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1082,7 +1082,14 @@ def _set_vocab_rwkv_world(self):
10821082
self.gguf_writer.add_token_list(tokens)
10831083
self.gguf_writer.add_token_types(toktypes)
10841084
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
1085-
special_vocab.chat_template = "rwkv-world"
1085+
if special_vocab.chat_template is None:
1086+
template_path = Path(__file__).parent / "models" / "templates" / "llama-cpp-rwkv-world.jinja"
1087+
if template_path.is_file():
1088+
with open(template_path, "r", encoding="utf-8") as f:
1089+
template = f.read()
1090+
else:
1091+
template = "rwkv-world"
1092+
special_vocab.chat_template = template
10861093
# hack: Add '\n\n' as the EOT token to make it chat normally
10871094
special_vocab._set_special_token("eot", 261)
10881095
# hack: Override these as they have already been set (incorrectly)

ggml/src/ggml-cpu/llamafile/sgemm.cpp

Lines changed: 343 additions & 1094 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cuda/set-rows.cu

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
typedef void (*set_rows_kernel_t)(const char * src, char * dst);
44

55
template<typename src_t, typename dst_t>
6-
__device__ void set_rows_1(const src_t * src_f, dst_t * dst_f) {}
6+
__device__ void set_rows_1(const src_t * src_f, dst_t * dst_f) {
7+
GGML_UNUSED(src_f);
8+
GGML_UNUSED(dst_f);
9+
}
710

811
template<>
912
__device__ __forceinline__ void set_rows_1<float, half>(const float * src_f, half * dst_h) {
@@ -53,6 +56,9 @@ static __global__ void k_set_rows(
5356
const src_t* src_elem = src0_row + i00;
5457
dst_t* dst_elem = dst_row_ptr + i00;
5558
set_rows_1(src_elem, dst_elem);
59+
60+
GGML_UNUSED(ne10);
61+
GGML_UNUSED(ne13);
5662
}
5763

5864
template<typename src_t, typename dst_t>

kcpp_adapters/AutoGuess.json

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,39 @@
177177
"assistant_start": "<|Assistant|>",
178178
"assistant_end": "<|end▁of▁sentence|>"
179179
}
180+
}, {
181+
"search": ["<|bom|>","is_last_checked_defined"],
182+
"name": "Jamba",
183+
"adapter": {
184+
"system_start": "<|bom|><|system|>",
185+
"system_end": "<|eom|>",
186+
"user_start": "<|bom|><|user|>",
187+
"user_end": "<|eom|>",
188+
"assistant_start": "<|bom|><|assistant|>",
189+
"assistant_end": "<|eom|>"
190+
}
191+
}, {
192+
"search": ["<|im_start|>assistant<|im_middle|>", "<|im_assistant|>assistant<|im_middle|>", "<|im_end|>"],
193+
"name": "ChatML (Kimi).",
194+
"adapter": {
195+
"system_start": "<|im_start|>system<|im_middle|>",
196+
"system_end": "<|im_end|>",
197+
"user_start": "<|im_start|>user<|im_middle|>",
198+
"user_end": "<|im_end|>",
199+
"assistant_start": "<|im_start|>assistant<|im_middle|>",
200+
"assistant_end": "<|im_end|>"
201+
}
202+
}, {
203+
"search": ["<|userprompt|>", "<|endofuserprompt|>", "<|response|>", "<|endofresponse|>"],
204+
"name": "Dots",
205+
"adapter": {
206+
"system_start": "<|system|>\n",
207+
"system_end": "<|endofsystem|>\n",
208+
"user_start": "<|userprompt|>\n",
209+
"user_end": "<|endofuserprompt|>\n",
210+
"assistant_start": "<|response|>\n",
211+
"assistant_end": "<|endofresponse|>\n"
212+
}
180213
}, {
181214
"search": ["rwkv-world"],
182215
"name": "RWKV World",

klite.embd

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
1212
-->
1313

1414
<script id="init-config">
15-
const LITEVER = 261;
15+
const LITEVER = 262;
1616
const urlParams = new URLSearchParams(window.location.search);
1717
var localflag = urlParams.get('local'); //this will be replaced automatically in embedded kcpp
1818
const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@@ -3605,10 +3605,12 @@ Current version indicated by LITEVER below.
36053605
entersubmit: true, //enter sends the prompt
36063606
darkmode: true,
36073607
render_streaming_markdown: true,
3608-
raw_instruct_tags: false, //experimental flag
3608+
3609+
raw_instruct_tags: false, //experimental flags
36093610
show_endpoint_selector: false,
36103611
no_warn_unsaved: false,
36113612
no_compress_audio: false,
3613+
autoguess_third_party:false,
36123614

36133615
//section migrated from story itself
36143616
extrastopseq: "",
@@ -4622,7 +4624,10 @@ Current version indicated by LITEVER below.
46224624
let instag = localsettings.instruct_starttag;
46234625
if(instag=="{{[INPUT]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()))
46244626
{
4625-
instag = "\n### Instruction:\n"; //backend not compatible with auto
4627+
if(!localsettings.autoguess_third_party)
4628+
{
4629+
instag = "\n### Instruction:\n"; //backend not compatible with auto
4630+
}
46264631
}
46274632
if(doTrim){
46284633
return replaceAll(instag, "\\n", "\n").trim();
@@ -4635,7 +4640,10 @@ Current version indicated by LITEVER below.
46354640
let instag = localsettings.instruct_endtag;
46364641
if(instag=="{{[OUTPUT]}}" && !(custom_kobold_endpoint != "" && is_using_kcpp_with_autotags()))
46374642
{
4638-
instag = "\n### Response:\n"; //backend not compatible with auto
4643+
if(!localsettings.autoguess_third_party)
4644+
{
4645+
instag = "\n### Response:\n"; //backend not compatible with auto
4646+
}
46394647
}
46404648
if(doTrim){
46414649
return replaceAll(instag, "\\n", "\n").trim();
@@ -13009,6 +13017,7 @@ Current version indicated by LITEVER below.
1300913017
document.getElementById("show_endpoint_selector").checked = localsettings.show_endpoint_selector;
1301013018
document.getElementById("no_warn_unsaved").checked = localsettings.no_warn_unsaved;
1301113019
document.getElementById("no_compress_audio").checked = localsettings.no_compress_audio;
13020+
document.getElementById("autoguess_third_party").checked = localsettings.autoguess_third_party;
1301213021
document.getElementById("render_streaming_markdown").checked = localsettings.render_streaming_markdown;
1301313022
document.getElementById("min_p").value = localsettings.min_p;
1301413023
document.getElementById("dynatemp_range").value = localsettings.dynatemp_range;
@@ -13559,6 +13568,7 @@ Current version indicated by LITEVER below.
1355913568
localsettings.show_endpoint_selector = (document.getElementById("show_endpoint_selector").checked ? true : false);
1356013569
localsettings.no_warn_unsaved = (document.getElementById("no_warn_unsaved").checked ? true : false);
1356113570
localsettings.no_compress_audio = (document.getElementById("no_compress_audio").checked ? true : false);
13571+
localsettings.autoguess_third_party = (document.getElementById("autoguess_third_party").checked ? true : false);
1356213572
localsettings.render_streaming_markdown = (document.getElementById("render_streaming_markdown").checked ? true : false);
1356313573
if(document.getElementById("opmode").value==1)
1356413574
{
@@ -14290,9 +14300,9 @@ Current version indicated by LITEVER below.
1429014300
let userinput = getInputBoxValue();
1429114301
if(userinput.trim()!="")
1429214302
{
14293-
let str = get_instructendplaceholder() + userinput.trim();
14303+
let str = get_instructstartplaceholder() + userinput.trim();
1429414304
if (localsettings.separate_end_tags) {
14295-
str += get_instructendplaceholder_end();
14305+
str += get_instructstartplaceholder_end();
1429614306
}
1429714307
document.getElementById("memorytext").value += str;
1429814308
}
@@ -18938,12 +18948,15 @@ Current version indicated by LITEVER below.
1893818948
gentxt = gentxt.substring(curtag.length);
1893918949
}
1894018950

18941-
let found = gentxt.indexOf(curtag);
18942-
let splitresponse = [];
18943-
if (found != -1) //if found, truncate to it
18951+
if(localsettings.includedefaultstops)
1894418952
{
18945-
splitresponse = gentxt.split(curtag);
18946-
gentxt = splitresponse[0];
18953+
let found = gentxt.indexOf(curtag);
18954+
let splitresponse = [];
18955+
if (found != -1) //if found, truncate to it
18956+
{
18957+
splitresponse = gentxt.split(curtag);
18958+
gentxt = splitresponse[0];
18959+
}
1894718960
}
1894818961
}
1894918962

@@ -26043,6 +26056,11 @@ Current version indicated by LITEVER below.
2604326056
class="helptext">Do not compress embedded audio files. Might crash on big files! (caution!)</span></span></div>
2604426057
<input title="Do Not Recompress Audio" type="checkbox" id="no_compress_audio" style="margin:0px 0px 0px 0px;">
2604526058
</div>
26059+
<div class="settinglabel">
26060+
<div class="justifyleft settingsmall">AutoguessTagsForThirdParty <span class="helpicon">?<span
26061+
class="helptext">Sends raw KoboldCppAutomatic AutoGuess tags to third party APIs. (e.g. Horde) Be warned, you better hope they handle them well internally...</span></span></div>
26062+
<input title="Send Autoguess Tags For Third Party APIs" type="checkbox" id="autoguess_third_party" style="margin:0px 0px 0px 0px;">
26063+
</div>
2604626064
</div>
2604726065

2604826066
<div class="settingitem wide">

koboldcpp.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,7 +1127,7 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, qkv_level): #shitty algo to dete
11271127
if fsize > (10*1024*1024): #dont bother with models < 10mb
11281128
cs = ctxsize
11291129
mem = gpumem
1130-
if "-00001-of-0000" in fname:
1130+
if "-00001-of-00" in fname:
11311131
match = re.search(r'-(\d{5})-of-(\d{5})\.', fname)
11321132
if match:
11331133
total_parts = int(match.group(2))
@@ -2916,6 +2916,15 @@ def transform_genparams(genparams, api_format):
29162916
# In case of any issues, just do normal gen
29172917
print("Structured Output not valid - discarded")
29182918
pass
2919+
elif 'json_schema' in genparams:
2920+
try:
2921+
schema = genparams.get('json_schema')
2922+
decoded = convert_json_to_gbnf(schema)
2923+
if decoded:
2924+
genparams["grammar"] = decoded
2925+
except Exception:
2926+
print("Structured Output (old format) not valid - discarded")
2927+
pass
29192928

29202929
message_index = 0
29212930
for message in messages_array:
@@ -3639,6 +3648,18 @@ async def handle_sse_stream(self, genparams, api_format):
36393648
tokenStr = tokenStr[:sindex]
36403649

36413650
if tokenStr!="" or streamDone:
3651+
need_split_final_msg = True if (currfinishreason is not None and streamDone and tokenStr!="") else False
3652+
if need_split_final_msg: #we need to send one message without the finish reason, then send a finish reason with no msg to follow standards
3653+
if api_format == 4: # if oai chat, set format to expected openai streaming response
3654+
event_str = json.dumps({"id":"koboldcpp","object":"chat.completion.chunk","created":int(time.time()),"model":friendlymodelname,"choices":[{"index":0,"finish_reason":None,"delta":{'role':'assistant','content':tokenStr}}]})
3655+
await self.send_oai_sse_event(event_str)
3656+
elif api_format == 3: # non chat completions
3657+
event_str = json.dumps({"id":"koboldcpp","object":"text_completion","created":int(time.time()),"model":friendlymodelname,"choices":[{"index":0,"finish_reason":None,"text":tokenStr}]})
3658+
await self.send_oai_sse_event(event_str)
3659+
else:
3660+
event_str = json.dumps({"token": tokenStr, "finish_reason":None})
3661+
await self.send_kai_sse_event(event_str)
3662+
tokenStr = "" # now the final finish reason can be sent alone
36423663
if api_format == 4: # if oai chat, set format to expected openai streaming response
36433664
event_str = json.dumps({"id":"koboldcpp","object":"chat.completion.chunk","created":int(time.time()),"model":friendlymodelname,"choices":[{"index":0,"finish_reason":currfinishreason,"delta":{'role':'assistant','content':tokenStr}}]})
36443665
await self.send_oai_sse_event(event_str)
@@ -6766,7 +6787,7 @@ def save_config_gui():
67666787
def load_config_gui(): #this is used to populate the GUI with a config file, whereas load_config_cli simply overwrites cli args
67676788
file_type = [("KoboldCpp Settings", "*.kcpps *.kcppt")]
67686789
global runmode_untouched, zenity_permitted
6769-
filename = zentk_askopenfilename(filetypes=file_type, defaultextension=".kcppt", initialdir=None)
6790+
filename = zentk_askopenfilename(filetypes=file_type, defaultextension=".kcppt", initialdir=None, title="Select kcpps or kcppt settings config file")
67706791
if not filename or filename=="":
67716792
return
67726793
if not os.path.exists(filename) or os.path.getsize(filename)<4 or os.path.getsize(filename)>50000000: #for sanity, check invaid kcpps
@@ -7177,6 +7198,7 @@ def tunnel_reader():
71777198
def reload_from_new_args(newargs):
71787199
try:
71797200
args.istemplate = False
7201+
newargs = convert_invalid_args(newargs)
71807202
for key, value in newargs.items(): #do not overwrite certain values
71817203
if key not in ["remotetunnel","showgui","port","host","port_param","admin","adminpassword","admindir","admintextmodelsdir","admindatadir","adminallowhf","ssl","nocertify","benchmark","prompt","config"]:
71827204
setattr(args, key, value)
@@ -7202,6 +7224,7 @@ def load_config_cli(filename):
72027224
print("Loading .kcpps configuration file...")
72037225
with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
72047226
config = json.load(f)
7227+
config = convert_invalid_args(config)
72057228
if "onready" in config:
72067229
config["onready"] = "" #do not allow onready commands from config
72077230
args.istemplate = False
@@ -7358,7 +7381,7 @@ def download_model_from_url(url, permitted_types=[".gguf",".safetensors", ".ggml
73587381
break
73597382
if ((url.startswith("http://") or url.startswith("https://")) and end_ext_ok):
73607383
dlfile = downloader_internal(url, "auto", False, min_file_size)
7361-
if handle_multipart and "-00001-of-0000" in url: #handle multipart files up to 9 parts
7384+
if handle_multipart and "-00001-of-00" in url: #handle multipart files up to 9 parts
73627385
match = re.search(r'-(\d{5})-of-(\d{5})\.', url)
73637386
if match:
73647387
total_parts = int(match.group(2))
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{%- if not add_generation_prompt is defined -%}
2+
{%- set add_generation_prompt = true -%}
3+
{%- endif -%}
4+
{%- set ns = namespace(system_prompt='') -%}
5+
{%- for message in messages -%}
6+
{%- if message['role'] == 'system' -%}
7+
{%- set ns.system_prompt = message['content'] -%}
8+
{%- endif -%}
9+
{%- endfor -%}
10+
{{bos_token}}
11+
{%- if ns.system_prompt != '' -%}
12+
{{- 'System: ' + ns.system_prompt + '\n\n' -}}
13+
{%- endif -%}
14+
{%- for message in messages -%}
15+
{%- if message['role'] == 'user' -%}
16+
{{- 'User: ' + message['content']|trim + '\n\n' -}}
17+
{%- endif -%}
18+
{%- if message['role'] == 'assistant' and message['content'] is not none -%}
19+
{%- set content = message['content'] -%}
20+
{%- if '</think>' in content -%}
21+
{%- set content = content.split('</think>')[-1] -%}
22+
{%- endif -%}
23+
{{- 'Assistant: ' + content|trim + '\n\n' -}}
24+
{%- endif -%}
25+
{%- endfor -%}
26+
{%- if add_generation_prompt -%}
27+
{{- 'Assistant:' -}}
28+
{%- if enable_thinking is defined and enable_thinking is false %}
29+
{{- ' <think>\n</think>' }}
30+
{%- endif %}
31+
{%- if enable_thinking is defined and enable_thinking is true %}
32+
{{- ' <think>' }}
33+
{%- endif %}
34+
{%- endif -%}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
datasets~=3.2.0
2+
matplotlib~=3.10.0
3+
numpy~=1.26.4
4+
requests~=2.32.3
5+
tqdm~=4.67.1

0 commit comments

Comments
 (0)