Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/test-autoguess.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: AutoGuess Tests

on:
pull_request:
branches:
- concedo_experimental
paths:
- 'kcpp_adapters/AutoGuess.json'

jobs:
test-autoguess:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x' # Adjust to your preferred Python version

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests
git clone https://github.com/kallewoof/gated-tokenizers.git tests/gated-tokenizers

- name: Run AutoGuess tests
run: python tests/test_autoguess.py
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ rocblas.dll
hipblas.dll
koboldcpp_hipblas.so
koboldcpp_hipblas.dll
.tokenizer_configs

bin/
conda/
Expand Down
20 changes: 8 additions & 12 deletions kcpp_adapters/AutoGuess.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
}
}, {
"search": ["<|im_start|>assistant", "<|im_end|>", "You are provided with function signatures within <tools>"],
"name": "ChatML (Qwen 2.5 based).",
"name": "ChatML (Qwen 2.5 based)",
"adapter": {
"system_start": "<|im_start|>system\n",
"system_end": "<|im_end|>\n",
Expand All @@ -25,7 +25,7 @@
}
}, {
"search": ["<|im_user|>user<|im_middle|>", "<|im_assistant|>assistant<|im_middle|>", "<|im_end|>"],
"name": "ChatML (Kimi).",
"name": "ChatML (Kimi)",
"adapter": {
"system_start": "<|im_system|>system<|im_middle|>",
"system_end": "<|im_end|>",
Expand All @@ -36,7 +36,7 @@
}
}, {
"search": ["System role not supported", "<start_of_turn>"],
"name": "Google Gemma 2.",
"name": "Google Gemma 2",
"adapter": {
"system_start": "<start_of_turn>user\n",
"system_end": "<end_of_turn>\n",
Expand All @@ -47,7 +47,7 @@
}
}, {
"search": ["<start_of_image>", "<start_of_turn>", "<end_of_turn>"],
"name": "Google Gemma 3.",
"name": "Google Gemma 3",
"adapter": {
"system_start": "<start_of_turn>user\n",
"system_end": "<end_of_turn>\n",
Expand All @@ -58,7 +58,7 @@
}
}, {
"search": ["<image_soft_token>", "<start_of_turn>model", "<end_of_turn>"],
"name": "Google Gemma 3n.",
"name": "Google Gemma 3n",
"adapter": {
"system_start": "<start_of_turn>user\n",
"system_end": "<end_of_turn>\n",
Expand All @@ -69,7 +69,7 @@
}
},{
"search": ["<|start_header_id|>assistant<|end_header_id|>"],
"name": "Llama 3.x.",
"name": "Llama 3.x",
"adapter": {
"system_start": "<|start_header_id|>system<|end_header_id|>\n\n",
"system_end": "<|eot_id|>",
Expand Down Expand Up @@ -199,11 +199,7 @@
"assistant_start": "ASSISTANT: ",
"assistant_end": "</s>\n"
}
},



{
}, {
"search": ["[/INST]"],
"name": "Mistral (Generic)",
"adapter": {
Expand All @@ -216,7 +212,7 @@
}
}, {
"search": ["<|im_start|>assistant", "<|im_end|>"],
"name": "ChatML (Generic).",
"name": "ChatML (Generic)",
"adapter": {
"system_start": "<|im_start|>system\n",
"system_end": "<|im_end|>\n",
Expand Down
106 changes: 106 additions & 0 deletions tests/test_autoguess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""
Test that the AutoGuess feature picks the correct model for every template.
Also checks that every template is being tested so that when new AutoGuess additions are made, this test fails unless an accompanying test is included.
"""
import os
import sys
import requests
import json


# Map an AutoGuess name to a HuggingFace model ID
# THIS LIST MUST BE UPDATED WHEN A NEW MODEL IS ADDED
AUTOGUESS_MAPPING = {
"ChatML (Phi 4)": "microsoft/phi-4",
"ChatML (Qwen 2.5 based)": "Qwen/Qwen2.5-0.5B-Instruct",
"ChatML (Kimi)": "moonshotai/Kimi-K2-Instruct",
"Google Gemma 2": "Efficient-Large-Model/gemma-2-2b-it",
"Google Gemma 3": "scb10x/typhoon2.1-gemma3-12b",
"Google Gemma 3n": "lmstudio-community/gemma-3n-E4B-it-MLX-bf16",
"Llama 3.x": "Steelskull/L3.3-Shakudo-70b",
"Llama 4": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
"Mistral V7 (with system prompt)": "Doctor-Shotgun/MS3.2-24B-Magnum-Diamond",
"Mistral V3": "mistralai/Mistral-7B-Instruct-v0.3",
"GLM-4": "THUDM/glm-4-9b-chat-hf",
"Phi 3.5": "microsoft/Phi-3.5-mini-instruct",
"Phi 4 (mini)": "microsoft/Phi-4-mini-instruct",
"Cohere (Aya Expanse 32B based)": "CohereLabs/aya-expanse-32b",
"DeepSeek V2.5": "deepseek-ai/DeepSeek-V2.5",
"Jamba": "ai21labs/Jamba-tiny-dev",
"Dots": "rednote-hilab/dots.llm1.inst",
"RWKV World": "fla-hub/rwkv7-1.5B-world",
"Mistral (Generic)": "mistralai/Mistral-Nemo-Instruct-2407",
"ChatML (Generic)": "NewEden/Gemma-27B-chatml",
}

# User may be running this test from ./ or from ../ -- we want to be in ./ (i.e. tests)
if os.path.exists("tests"):
os.chdir("tests")

with open("../kcpp_adapters/AutoGuess.json") as f:
autoguess = json.load(f)

def get_tokenizer_config_for_huggingface_model_id(huggingface_model_id: str):
fname = f"gated-tokenizers/tokenizer_configs/{huggingface_model_id.replace('/','_')}.json"
if os.path.exists(fname):
with open(fname) as f:
return json.load(f)

for filename in ["tokenizer_config.json", "chat_template.json"]:
url = f"https://huggingface.co/{huggingface_model_id}/resolve/main/{filename}"
response = requests.get(url)
if response.status_code == 200:
v = json.loads(response.text)
if 'chat_template' in v:
return v
raise ValueError(f"Failed to fetch tokenizer config for {huggingface_model_id}.")

def match_chat_template_to_adapter(chat_template: str|list) -> tuple[str, str|None]|None:
# Additional code in tester not present in application: support for multiple chat templates, and use default if present
sub_template: str|None = None
if isinstance(chat_template, list):
found = False
for template in chat_template:
# {"name": .., "template": ...}
if template['name'] == "default":
sub_template = "default"
chat_template = template['template']
found = True
break
if not found:
# We pick the first template if no default is present
sub_template = chat_template[0]['name']
chat_template = chat_template[0]['template']
if chat_template != "":
for entry in autoguess:
if all(s in chat_template for s in entry['search']):
return entry['name'], sub_template

failures = 0
seen = set()
namefmt = "{name:<" + str(max(len(name) for name in AUTOGUESS_MAPPING.keys())) + "}"
hmifmt = "{huggingface_model_id:<" + str(max(len(huggingface_model_id) for huggingface_model_id in AUTOGUESS_MAPPING.values())) + "}"
for name, huggingface_model_id in AUTOGUESS_MAPPING.items():
seen.add(name)
if huggingface_model_id == "***UNKNOWN***":
print(namefmt.format(name=name) + " = " + namefmt.format(name="***UNKNOWN***") + " : PENDING")
continue
tokenizer_config = get_tokenizer_config_for_huggingface_model_id(huggingface_model_id)
assert 'chat_template' in tokenizer_config
matched = match_chat_template_to_adapter(tokenizer_config['chat_template'])
if matched is None:
matched, sub_template = "MISSING MAPPING", None
else:
matched, sub_template = matched
sub_template = f"[{sub_template}]" if sub_template else ""
print(namefmt.format(name=name) + " = " + namefmt.format(name=matched) + " : " + ("OK " if name == matched else "FAILURE") + " " + hmifmt.format(huggingface_model_id=huggingface_model_id) + " " + sub_template)
failures += name != matched

for entry in autoguess:
if entry['name'] not in seen:
print(namefmt.format(name=entry['name']) + " MISSING MAPPING")
failures += 1

if failures > 0:
print(f"There were {failures} failure(s)!")
sys.exit(1)
Loading