|
5 | 5 | "profiles": [ |
6 | 6 | { |
7 | 7 | "name": "thinking", |
8 | | - "command": "./llama-cli -hf Qwen/Qwen3-8B-GGUF:Q8_0 --jinja -fa --temp 0.6 --top-k 20 --top-p 0.95 --min-p 0 -c 40960 -n 32768 --no-context-shift", |
| 8 | + "command": "./llama-cli --model bartowski/Qwen_Qwen3-8B-GGUF/Qwen_Qwen3-8B-Q3_K_M.gguf --jinja --flash-attn --temp 0.6 --top-k 20 --top-p 0.95 --min-p 0 --ctx-size 40960 --predict 32768 --no-context-shift", |
9 | 9 | "references": [ |
10 | 10 | "https://qwen.readthedocs.io/en/latest/run_locally/llama.cpp.html#llama-cli", |
11 | 11 | "https://huggingface.co/Qwen/Qwen3-235B-A22B#switching-between-thinking-and-non-thinking-mode" |
12 | 12 | ] |
13 | 13 | }, |
14 | 14 | { |
15 | 15 | "name": "non thinking", |
16 | | - "command": "./llama-cli -hf Qwen/Qwen3-8B-GGUF:Q8_0 --jinja -fa --temp 0.7 --top-k 20 --top-p 0.8 --min-p 0 -c 40960 -n 32768 --no-context-shift", |
| 16 | + "command": "./llama-cli --model bartowski/Qwen_Qwen3-8B-GGUF/Qwen_Qwen3-8B-Q3_K_M.gguf --jinja --flash-attn --temp 0.7 --top-k 20 --top-p 0.8 --min-p 0 --ctx-size 40960 --predict 32768 --no-context-shift", |
17 | 17 | "references": [ |
18 | 18 | "https://qwen.readthedocs.io/en/latest/run_locally/llama.cpp.html#llama-cli", |
19 | 19 | "https://huggingface.co/Qwen/Qwen3-235B-A22B#switching-between-thinking-and-non-thinking-mode" |
|
26 | 26 | "profiles": [ |
27 | 27 | { |
28 | 28 | "name": "chat", |
29 | | - "command": "./llama-cli --model unsloth-QwQ-32B-GGUF/QwQ-32B-Q4_K_M.gguf --ctx-size 16384 --temp 0.6 --repeat-penalty 1.1 --dry-multiplier 0.5 --min-p 0.01 --top-k 40 --top-p 0.95 --samplers \"top_k;top_p;min_p;temperature;dry;typ_p;xtc\"", |
| 29 | + "command": "./llama-cli --model unsloth-QwQ-32B-GGUF/QwQ-32B-Q4_K_M.gguf --ctx-size 32768 --temp 0.6 --repeat-penalty 1.1 --dry-multiplier 0.5 --min-p 0.01 --top-k 40 --top-p 0.95 --samplers \"top_k;top_p;min_p;temperature;dry;typ_p;xtc\"", |
30 | 30 | "references": [ |
31 | 31 | "https://modelscope.cn/models/Qwen/QwQ-32B", |
32 | 32 | "https://huggingface.co/Qwen/QwQ-32B", |
|
40 | 40 | "profiles": [ |
41 | 41 | { |
42 | 42 | "name": "chat", |
43 | | - "command": "./llama-cli --model unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF/Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf --ctx-size 16384 -ot \".ffn_.*_exps.=CPU\" --temp 0.6 --min-p 0.01 --top-p 0.9", |
| 43 | + "command": "./llama-cli --model unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF/Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf --ctx-size 10485760 --temp 0.6 --min-p 0.01 --top-p 0.9", |
44 | 44 | "references": [ |
45 | 45 | "https://www.llama.com/docs/llama-everywhere/running-meta-llama-on-linux/", |
46 | 46 | "https://docs.unsloth.ai/basics/llama-4-how-to-run-and-fine-tune" |
|
53 | 53 | "profiles": [ |
54 | 54 | { |
55 | 55 | "name": "chat", |
56 | | - "command": "./llama-cli --model bartowski/google_gemma-3-27b-it-qat-GGUF/gemma-3-27b-it-Q4_K_M.gguf --ctx-size 16384 --temp 1.0 --repeat-penalty 1.0 --min-p 0.01 --top-k 64 --top-p 0.95", |
| 56 | + "command": "./llama-cli --model bartowski/google_gemma-3-27b-it-qat-GGUF/gemma-3-27b-it-Q4_K_M.gguf --ctx-size 131072 --temp 1.0 --repeat-penalty 1.0 --min-p 0.01 --top-k 64 --top-p 0.95", |
57 | 57 | "references": [ |
58 | 58 | "https://ollama.com/library/gemma3/blobs/3116c5225075", |
59 | 59 | "https://docs.unsloth.ai/basics/gemma-3-how-to-run-and-fine-tune" |
|
78 | 78 | "profiles": [ |
79 | 79 | { |
80 | 80 | "name": "chat", |
81 | | - "command": "./llama-cli --model bartowski/agentica-org_DeepCoder-14B-Preview-GGUF/agentica-org_DeepCoder-14B-Preview-Q6_K.gguf --ctx-size 64000 --temp 0.6 --top-p 0.95", |
| 81 | + "command": "./llama-cli --model bartowski/agentica-org_DeepCoder-14B-Preview-GGUF/agentica-org_DeepCoder-14B-Preview-Q6_K.gguf --ctx-size 131072 --temp 0.6 --top-p 0.95", |
82 | 82 | "references": [ |
83 | 83 | "https://huggingface.co/agentica-org/DeepCoder-14B-Preview" |
84 | 84 | ], |
|
0 commit comments