|
1 | 1 | { |
2 | 2 | "models": [ |
3 | 3 | { |
4 | | - "name": "Qwen3", |
| 4 | + "name": "Qwen/Qwen3", |
5 | 5 | "profiles": [ |
6 | 6 | { |
7 | 7 | "name": "thinking", |
8 | | - "command": "./llama-cli --model bartowski/Qwen_Qwen3-8B-GGUF/Qwen_Qwen3-8B-Q3_K_M.gguf --jinja --temp 0.6 --top-k 20 --top-p 0.95 --min-p 0 --ctx-size 40960 --predict 32768", |
| 8 | + "command": "./llama-cli --model bartowski/Qwen_Qwen3-8B-GGUF/Qwen_Qwen3-8B-Q8_0.gguf --jinja --temp 0.6 --top-k 20 --top-p 0.95 --min-p 0 --ctx-size 40960 --predict 32768", |
9 | 9 | "references": [ |
10 | 10 | "https://qwen.readthedocs.io/en/latest/run_locally/llama.cpp.html#llama-cli", |
11 | 11 | "https://huggingface.co/Qwen/Qwen3-235B-A22B#switching-between-thinking-and-non-thinking-mode" |
|
16 | 16 | }, |
17 | 17 | { |
18 | 18 | "name": "non thinking", |
19 | | - "command": "./llama-cli --model bartowski/Qwen_Qwen3-8B-GGUF/Qwen_Qwen3-8B-Q3_K_M.gguf --jinja --temp 0.7 --top-k 20 --top-p 0.8 --min-p 0 --ctx-size 40960 --predict 32768", |
| 19 | + "command": "./llama-cli --model bartowski/Qwen_Qwen3-8B-GGUF/Qwen_Qwen3-8B-Q8_0.gguf --jinja --temp 0.7 --top-k 20 --top-p 0.8 --min-p 0 --ctx-size 40960 --predict 32768", |
20 | 20 | "references": [ |
21 | 21 | "https://qwen.readthedocs.io/en/latest/run_locally/llama.cpp.html#llama-cli", |
22 | 22 | "https://huggingface.co/Qwen/Qwen3-235B-A22B#switching-between-thinking-and-non-thinking-mode" |
|
28 | 28 | ] |
29 | 29 | }, |
30 | 30 | { |
31 | | - "name": "QwQ", |
| 31 | + "name": "Qwen/QwQ", |
32 | 32 | "profiles": [ |
33 | 33 | { |
34 | 34 | "name": "chat", |
35 | | - "command": "./llama-cli --model unsloth-QwQ-32B-GGUF/QwQ-32B-Q4_K_M.gguf --ctx-size 32768 --temp 0.6 --min-p 0.01 --top-k 40 --top-p 0.95", |
| 35 | + "command": "./llama-cli --model unsloth-QwQ-32B-GGUF/QwQ-32B-Q8_0.gguf --ctx-size 32768 --temp 0.6 --min-p 0.01 --top-k 40 --top-p 0.95", |
36 | 36 | "references": [ |
37 | 37 | "https://modelscope.cn/models/Qwen/QwQ-32B", |
38 | 38 | "https://huggingface.co/Qwen/QwQ-32B", |
|
46 | 46 | ] |
47 | 47 | }, |
48 | 48 | { |
49 | | - "name": "Llama-4", |
| 49 | + "name": "meta-llama/Llama-4", |
50 | 50 | "profiles": [ |
51 | 51 | { |
52 | 52 | "name": "chat", |
|
62 | 62 | ] |
63 | 63 | }, |
64 | 64 | { |
65 | | - "name": "Gemma3", |
| 65 | + "name": "google/gemma-3", |
66 | 66 | "profiles": [ |
67 | 67 | { |
68 | 68 | "name": "chat", |
69 | | - "command": "./llama-cli --model bartowski/google_gemma-3-27b-it-qat-GGUF/gemma-3-27b-it-Q4_K_M.gguf --ctx-size 131072 --temp 1.0 --repeat-penalty 1.0 --min-p 0.01 --top-k 64 --top-p 0.95", |
| 69 | + "command": "./llama-cli --model bartowski/google_gemma-3-27b-it-qat-GGUF/gemma-3-27b-it-Q8_0.gguf --ctx-size 131072 --temp 1.0 --repeat-penalty 1.0 --min-p 0.01 --top-k 64 --top-p 0.95", |
70 | 70 | "references": [ |
71 | 71 | "https://ollama.com/library/gemma3/blobs/3116c5225075", |
72 | 72 | "https://docs.unsloth.ai/basics/gemma-3-how-to-run-and-fine-tune" |
|
78 | 78 | ] |
79 | 79 | }, |
80 | 80 | { |
81 | | - "name": "Phi-4-reasoning", |
| 81 | + "name": "microsoft/Phi-4-reasoning", |
82 | 82 | "profiles": [ |
83 | 83 | { |
84 | 84 | "name": "chat", |
85 | | - "command": "./llama-cli --model bartowski/microsoft_Phi-4-reasoning-plus-GGUF/microsoft_Phi-4-reasoning-plus-Q4_K_M.gguf --ctx-size 32768 --temp 0.8 --top-k 50 --top-p 0.95 --reasoning-format deepseek", |
| 85 | + "command": "./llama-cli --model bartowski/microsoft_Phi-4-reasoning-plus-GGUF/microsoft_Phi-4-reasoning-plus-Q8_0.gguf --ctx-size 32768 --temp 0.8 --top-k 50 --top-p 0.95 --reasoning-format deepseek", |
86 | 86 | "references": [ |
87 | 87 | "https://huggingface.co/microsoft/Phi-4-reasoning" |
88 | 88 | ], |
|
93 | 93 | ] |
94 | 94 | }, |
95 | 95 | { |
96 | | - "name": "DeepCoder-Preview", |
| 96 | + "name": "mistralai/Mistral-Small", |
97 | 97 | "profiles": [ |
98 | 98 | { |
99 | 99 | "name": "chat", |
100 | | - "command": "./llama-cli --model bartowski/agentica-org_DeepCoder-14B-Preview-GGUF/agentica-org_DeepCoder-14B-Preview-Q6_K.gguf --ctx-size 131072 --temp 0.6 --top-p 0.95", |
| 100 | + "command": "./llama-cli --model bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF --ctx-size 32768 --temp 0.15", |
101 | 101 | "references": [ |
102 | | - "https://huggingface.co/agentica-org/DeepCoder-14B-Preview" |
103 | | - ], |
104 | | - "notes": [ |
105 | | - "the model performs best with max_tokens set to at least 64000" |
106 | | - ] |
107 | | - } |
108 | | - ] |
109 | | - }, |
110 | | - { |
111 | | - "name": "EXAONE-Deep", |
112 | | - "profiles": [ |
113 | | - { |
114 | | - "name": "chat", |
115 | | - "command": "./llama-cli --model bartowski/LGAI-EXAONE_EXAONE-Deep-32B-GGUF/LGAI-EXAONE_EXAONE-Deep-32B-Q4_K_M.gguf --ctx-size 32768 --temp 0.6 --top-p 0.95", |
116 | | - "references": [ |
117 | | - "https://huggingface.co/LGAI-EXAONE/EXAONE-Deep-32B" |
118 | | - ], |
119 | | - "notes": [ |
120 | | - "Ensure the model starts with <thought>\\n for reasoning steps." |
| 102 | + "https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503" |
121 | 103 | ] |
122 | 104 | } |
123 | 105 | ] |
124 | 106 | }, |
125 | 107 | { |
126 | | - "name": "Mistral-Small-24B-Instruct-2501", |
| 108 | + "name": "Qwen/WorldPM", |
127 | 109 | "profiles": [ |
128 | 110 | { |
129 | 111 | "name": "chat", |
130 | | - "command": "./llama-cli --model bartowski/Mistral-Small-24B-Instruct-2501-GGUF/Mistral-Small-24B-Instruct-2501-Q4_K_M.gguf --ctx-size 32768 --temp 0.15", |
| 112 | + "command": "./llama-cli --model bartowski/Qwen_WorldPM-72B-GGUF/Qwen_WorldPM-72B-Q8_0.gguf --ctx-size 131072 --temp 0.7 --min-p 0.01 --top-k 20 --top-p 0.8 --repeat-penalty 1.05", |
131 | 113 | "references": [ |
132 | | - "https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501" |
| 114 | + "https://huggingface.co/Qwen/WorldPM-72B" |
133 | 115 | ] |
134 | 116 | } |
135 | 117 | ] |
|
0 commit comments