Skip to content

Commit 362ca55

Browse files
author
kerem seyhan
committed
changd the gguf references to the q8_0 version to keep it consistent, prefixed the model names with the organization names, added Qwen/WorldPM.
1 parent 7b1c65e commit 362ca55

File tree

1 file changed

+16
-34
lines changed

1 file changed

+16
-34
lines changed

models.json

Lines changed: 16 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
{
22
"models": [
33
{
4-
"name": "Qwen3",
4+
"name": "Qwen/Qwen3",
55
"profiles": [
66
{
77
"name": "thinking",
8-
"command": "./llama-cli --model bartowski/Qwen_Qwen3-8B-GGUF/Qwen_Qwen3-8B-Q3_K_M.gguf --jinja --temp 0.6 --top-k 20 --top-p 0.95 --min-p 0 --ctx-size 40960 --predict 32768",
8+
"command": "./llama-cli --model bartowski/Qwen_Qwen3-8B-GGUF/Qwen_Qwen3-8B-Q8_0.gguf --jinja --temp 0.6 --top-k 20 --top-p 0.95 --min-p 0 --ctx-size 40960 --predict 32768",
99
"references": [
1010
"https://qwen.readthedocs.io/en/latest/run_locally/llama.cpp.html#llama-cli",
1111
"https://huggingface.co/Qwen/Qwen3-235B-A22B#switching-between-thinking-and-non-thinking-mode"
@@ -16,7 +16,7 @@
1616
},
1717
{
1818
"name": "non thinking",
19-
"command": "./llama-cli --model bartowski/Qwen_Qwen3-8B-GGUF/Qwen_Qwen3-8B-Q3_K_M.gguf --jinja --temp 0.7 --top-k 20 --top-p 0.8 --min-p 0 --ctx-size 40960 --predict 32768",
19+
"command": "./llama-cli --model bartowski/Qwen_Qwen3-8B-GGUF/Qwen_Qwen3-8B-Q8_0.gguf --jinja --temp 0.7 --top-k 20 --top-p 0.8 --min-p 0 --ctx-size 40960 --predict 32768",
2020
"references": [
2121
"https://qwen.readthedocs.io/en/latest/run_locally/llama.cpp.html#llama-cli",
2222
"https://huggingface.co/Qwen/Qwen3-235B-A22B#switching-between-thinking-and-non-thinking-mode"
@@ -28,11 +28,11 @@
2828
]
2929
},
3030
{
31-
"name": "QwQ",
31+
"name": "Qwen/QwQ",
3232
"profiles": [
3333
{
3434
"name": "chat",
35-
"command": "./llama-cli --model unsloth-QwQ-32B-GGUF/QwQ-32B-Q4_K_M.gguf --ctx-size 32768 --temp 0.6 --min-p 0.01 --top-k 40 --top-p 0.95",
35+
"command": "./llama-cli --model unsloth-QwQ-32B-GGUF/QwQ-32B-Q8_0.gguf --ctx-size 32768 --temp 0.6 --min-p 0.01 --top-k 40 --top-p 0.95",
3636
"references": [
3737
"https://modelscope.cn/models/Qwen/QwQ-32B",
3838
"https://huggingface.co/Qwen/QwQ-32B",
@@ -46,7 +46,7 @@
4646
]
4747
},
4848
{
49-
"name": "Llama-4",
49+
"name": "meta-llama/Llama-4",
5050
"profiles": [
5151
{
5252
"name": "chat",
@@ -62,11 +62,11 @@
6262
]
6363
},
6464
{
65-
"name": "Gemma3",
65+
"name": "google/gemma-3",
6666
"profiles": [
6767
{
6868
"name": "chat",
69-
"command": "./llama-cli --model bartowski/google_gemma-3-27b-it-qat-GGUF/gemma-3-27b-it-Q4_K_M.gguf --ctx-size 131072 --temp 1.0 --repeat-penalty 1.0 --min-p 0.01 --top-k 64 --top-p 0.95",
69+
"command": "./llama-cli --model bartowski/google_gemma-3-27b-it-qat-GGUF/gemma-3-27b-it-Q8_0.gguf --ctx-size 131072 --temp 1.0 --repeat-penalty 1.0 --min-p 0.01 --top-k 64 --top-p 0.95",
7070
"references": [
7171
"https://ollama.com/library/gemma3/blobs/3116c5225075",
7272
"https://docs.unsloth.ai/basics/gemma-3-how-to-run-and-fine-tune"
@@ -78,11 +78,11 @@
7878
]
7979
},
8080
{
81-
"name": "Phi-4-reasoning",
81+
"name": "microsoft/Phi-4-reasoning",
8282
"profiles": [
8383
{
8484
"name": "chat",
85-
"command": "./llama-cli --model bartowski/microsoft_Phi-4-reasoning-plus-GGUF/microsoft_Phi-4-reasoning-plus-Q4_K_M.gguf --ctx-size 32768 --temp 0.8 --top-k 50 --top-p 0.95 --reasoning-format deepseek",
85+
"command": "./llama-cli --model bartowski/microsoft_Phi-4-reasoning-plus-GGUF/microsoft_Phi-4-reasoning-plus-Q8_0.gguf --ctx-size 32768 --temp 0.8 --top-k 50 --top-p 0.95 --reasoning-format deepseek",
8686
"references": [
8787
"https://huggingface.co/microsoft/Phi-4-reasoning"
8888
],
@@ -93,43 +93,25 @@
9393
]
9494
},
9595
{
96-
"name": "DeepCoder-Preview",
96+
"name": "mistralai/Mistral-Small",
9797
"profiles": [
9898
{
9999
"name": "chat",
100-
"command": "./llama-cli --model bartowski/agentica-org_DeepCoder-14B-Preview-GGUF/agentica-org_DeepCoder-14B-Preview-Q6_K.gguf --ctx-size 131072 --temp 0.6 --top-p 0.95",
100+
"command": "./llama-cli --model bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF --ctx-size 32768 --temp 0.15",
101101
"references": [
102-
"https://huggingface.co/agentica-org/DeepCoder-14B-Preview"
103-
],
104-
"notes": [
105-
"the model performs best with max_tokens set to at least 64000"
106-
]
107-
}
108-
]
109-
},
110-
{
111-
"name": "EXAONE-Deep",
112-
"profiles": [
113-
{
114-
"name": "chat",
115-
"command": "./llama-cli --model bartowski/LGAI-EXAONE_EXAONE-Deep-32B-GGUF/LGAI-EXAONE_EXAONE-Deep-32B-Q4_K_M.gguf --ctx-size 32768 --temp 0.6 --top-p 0.95",
116-
"references": [
117-
"https://huggingface.co/LGAI-EXAONE/EXAONE-Deep-32B"
118-
],
119-
"notes": [
120-
"Ensure the model starts with <thought>\\n for reasoning steps."
102+
"https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503"
121103
]
122104
}
123105
]
124106
},
125107
{
126-
"name": "Mistral-Small-24B-Instruct-2501",
108+
"name": "Qwen/WorldPM",
127109
"profiles": [
128110
{
129111
"name": "chat",
130-
"command": "./llama-cli --model bartowski/Mistral-Small-24B-Instruct-2501-GGUF/Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf --ctx-size 32768 --temp 0.15",
112+
"command": "./llama-cli --model bartowski/Qwen_WorldPM-72B-GGUF/Qwen_WorldPM-72B-Q8_0.gguf --ctx-size 131072 --temp 0.7 --min-p 0.01 --top-k 20 --top-p 0.8 --repeat-penalty 1.05",
131113
"references": [
132-
"https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501"
114+
"https://huggingface.co/Qwen/WorldPM-72B"
133115
]
134116
}
135117
]

0 commit comments

Comments
 (0)