Skip to content

Commit 23ec550

Browse files
authored
PoC: add chat template heuristics (LostRuins#1283)
* PoC: add chat template heuristics The fallback chat template adapter of Vicuna is not ideal in some cases (e.g. a test against a sub-portion of the BBC news classification task on Kaggle gave an 82% accuracy with Vicuna and 88% with the official ChatML format for a q4_k_m Qwen 2.5 3B-Instruct gguf). This PR adds a proof of concept simple heuristic which looks at the chat template and upgrades the adapter when it is able to. * gemma 2 heuristic * Phi 4, Llama 3.x heuristics * better qwen vs generic heuristic * cleanup * mistral (generic) heuristic * fix sys msg for mistral * phi 3.5 * mistral v3 * cohere (aya expanse 32b based) * only derive from chat template if AutoGuess * add notes about alpaca fallbacks * added AutoGuess.json dummy * add mistral v7 * switch to using a json list with search strings
1 parent 3e6ef8e commit 23ec550

File tree

2 files changed

+136
-0
lines changed

2 files changed

+136
-0
lines changed

kcpp_adapters/AutoGuess.json

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
[
2+
{
3+
"search": ["<|im_start|>assistant", "<|im_end|>", "<|im_sep|>"],
4+
"name": "ChatML (Phi 4)",
5+
"adapter": {
6+
"system_start": "<|im_start|>system<|im_sep|>",
7+
"system_end": "<|im_end|>",
8+
"user_start": "<|im_start|>user<|im_sep|>",
9+
"user_end": "<|im_end|>",
10+
"assistant_start": "<|im_start|>assistant<|im_sep|>",
11+
"assistant_end": "<|im_end|>"
12+
}
13+
}, {
14+
"search": ["<|im_start|>assistant", "<|im_end|>", "You are provided with function signatures within <tools>"],
15+
"name": "ChatML (Qwen 2.5 based).",
16+
"adapter": {
17+
"system_start": "<|im_start|>system\n\n",
18+
"system_end": "<|im_end|>\n\n",
19+
"user_start": "<|im_start|>user\n\n",
20+
"user_end": "<|im_end|>\n\n",
21+
"assistant_start": "<|im_start|>assistant\n\n",
22+
"assistant_end": "<|im_end|>\n\n",
23+
"tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n",
24+
"tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n"
25+
}
26+
}, {
27+
"search": ["<|im_start|>assistant", "<|im_end|>"],
28+
"name": "ChatML (Generic).",
29+
"adapter": {
30+
"system_start": "<|im_start|>system\n\n",
31+
"system_end": "<|im_end|>\n\n",
32+
"user_start": "<|im_start|>user\n\n",
33+
"user_end": "<|im_end|>\n\n",
34+
"assistant_start": "<|im_start|>assistant\n\n",
35+
"assistant_end": "<|im_end|>\n\n"
36+
}
37+
}, {
38+
"search": ["System role not supported", "<start_of_turn>"],
39+
"name": "Google Gemma 2.",
40+
"adapter": {
41+
"user_start": "<start_of_turn>user\n",
42+
"user_end": "<end_of_turn>\n",
43+
"assistant_start": "<start_of_turn>model\n",
44+
"assistant_end": "<end_of_turn>\n"
45+
}
46+
}, {
47+
"search": ["<|start_header_id|>system"],
48+
"name": "Llama 3.x.",
49+
"adapter": {
50+
"system_start": "<|start_header_id|>system<|end_header_id|>\n\n",
51+
"system_end": "<|eot_id|>\n\n",
52+
"user_start": "<|start_header_id|>user<|end_header_id|>\n\n",
53+
"user_end": "<|eot_id|>\n\n",
54+
"assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n",
55+
"assistant_end": "<|eot_id|>\n\n"
56+
}
57+
}, {
58+
"search": ["[/INST]", "[SYSTEM_PROMPT]"],
59+
"name": "Mistral V7 (with system prompt)",
60+
"adapter": {
61+
"system_start": "[SYSTEM_PROMPT] ",
62+
"system_end": "[/SYSTEM_PROMPT]",
63+
"user_start": "[INST] ",
64+
"user_end": "[/INST]",
65+
"assistant_start": " ",
66+
"assistant_end": "</s>"
67+
}
68+
}, {
69+
"search": ["[/INST]", "\"[INST] \" + system_message"],
70+
"name": "Mistral V3",
71+
"adapter": {
72+
"system_start": "[INST] ",
73+
"system_end": "[/INST] ",
74+
"user_start": "[INST] ",
75+
"user_end": "[/INST] ",
76+
"assistant_start": "",
77+
"assistant_end": "</s>"
78+
}
79+
}, {
80+
"search": ["[/INST]"],
81+
"name": "Mistral (Generic)",
82+
"adapter": {
83+
"system_start": "[INST]",
84+
"system_end": "[/INST]\n",
85+
"user_start": "[INST]",
86+
"user_end": "[/INST]\n",
87+
"assistant_start": "",
88+
"assistant_end": "</s>"
89+
}
90+
}, {
91+
"search": ["<|system|>", "<|user|>"],
92+
"name": "Phi 3.5",
93+
"adapter": {
94+
"system_start": "<|system|>\n",
95+
"system_end": "<|end|>\n",
96+
"user_start": "<|user|>\n",
97+
"user_end": "<|end|>\n",
98+
"assistant_start": "<|assistant|>\n",
99+
"assistant_end": "<|end|>\n"
100+
}
101+
}, {
102+
"search": ["<|START_OF_TURN_TOKEN|>"],
103+
"name": "Cohere (Aya Expanse 32B based)",
104+
"adapter": {
105+
"system_start": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>",
106+
"system_end": "<|END_OF_TURN_TOKEN|>",
107+
"user_start": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
108+
"user_end": "<|END_OF_TURN_TOKEN|>",
109+
"assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
110+
"assistant_end": "<|END_OF_TURN_TOKEN|>"
111+
}
112+
}
113+
]

koboldcpp.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4648,6 +4648,29 @@ def main(launch_args,start_server=True):
46484648
exitcounter = 999
46494649
exit_with_error(3,"Could not load text model: " + modelname)
46504650

4651+
if (
4652+
chatcompl_adapter is not None
4653+
and isinstance(chatcompl_adapter, list)
4654+
):
4655+
# The chat completions adapter is a list that needs derivation from chat templates
4656+
# Try to derive chat completions adapter from chat template, now that we have the model loaded
4657+
ctbytes = handle.get_chat_template()
4658+
chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore")
4659+
candidates = chatcompl_adapter
4660+
chatcompl_adapter = None
4661+
if chat_template != "":
4662+
for entry in candidates:
4663+
if all(s in chat_template for s in entry['search']):
4664+
print(f"Chat completion heuristic: {entry['name']}")
4665+
chatcompl_adapter = entry['adapter']
4666+
break
4667+
if chatcompl_adapter is None:
4668+
print("Chat template heuristics failed to identify chat completions format. Alpaca will be used.")
4669+
4670+
if chatcompl_adapter is None and not args.chatcompletionsadapter:
4671+
print("Note: Alpaca format will be used for OpenAI Compatible API chat completions. Use --chatcompletionsadapter=AutoGuess to use chat template heuristics.")
4672+
4673+
46514674
#handle loading image model
46524675
if args.sdmodel and args.sdmodel!="":
46534676
imgmodel = args.sdmodel

0 commit comments

Comments
 (0)