@@ -58,193 +58,77 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi
5858
5959| Model Name | FP16 | Q8_0 | Q4_0 |
6060| :----------------------------| :-----:| :----:| :----:|
61- | AquilaChat2-7B | √ | √ | √ |
62- | Baichuan-7b | √ | √ | √ |
63- | Baichuan2-7B-Chat | √ | √ | √ |
64- | bitnet_b1_58-large | √ | √ | √ |
65- | bloom-560m | √ | x | √ |
66- | bloomz-alpaca-560m | √ | x | √ |
67- | c4ai-command-r-35B-v01 | x | x | x |
68- | chatglm3-6B | x | x | x |
69- | chinese-alpaca-2-1.3b | √ | √ | √ |
70- | CodeShell-7B | √ | √ | √ |
71- | deepseek-ai_deepseek-coder-1.3B-base | x | x | x |
72- | deepseek-ai_DeepSeek-V2-Lite | x | x | x |
73- | deepseek-coder-6.7B-instruct | x | x | x |
74- | DeepSeek-V2-Lite-64x1.5B | x | x | x |
75- | falcon-7b-instruct | √ | √ | √ |
76- | flan-t5-large | √ | √ | √ |
77- | gemma-2-9b-it | √ | √ | √ |
78- | glm-4-9B | x | x | x |
79- | gpt2 | √ | √ | √ |
80- | Gpt2-163M | √ | √ | √ |
81- | granite-3B-code-instruct | √ | √ | √ |
61+ | Llama-2 | √ | √ | √ |
62+ | Llama-3 | √ | √ | √ |
63+ | Mistral-7B | √ | √ | √ |
64+ | Mistral MOE | x | x | x |
65+ | DBRX | x | x | x |
66+ | Falcon | √ | √ | √ |
67+ | Chinese LLaMA/Alpaca | √ | √ | √ |
68+ | Vigogne(French) | √ | √ | √ |
69+ | BERT | √ | √ | √ |
70+ | Koala | √ | √ | √ |
71+ | Baichuan | √ | √ | √ |
72+ | Aquila 1 & 2 | √ | √ | √ |
73+ | Starcoder models | √ | √ | √ |
74+ | Refact | √ | √ | √ |
75+ | MPT | √ | √ | √ |
76+ | Bloom | √ | √ | √ |
77+ | Yi models | √ | √ | √ |
78+ | stablelm models | √ | √ | √ |
79+ | DeepSeek models | x | x | x |
80+ | Qwen models | √ | √ | √ |
81+ | PLaMo-13B | √ | √ | √ |
82+ | Phi models | √ | √ | √ |
83+ | PhiMoE | x | x | x |
84+ | GPT-2 | √ | √ | √ |
85+ | Orion | √ | √ | √ |
86+ | InternlLM2 | √ | √ | √ |
87+ | CodeShell | √ | √ | √ |
88+ | Gemma | √ | √ | √ |
89+ | Mamba | √ | √ | √ |
90+ | Xverse | √ | √ | √ |
91+ | command-r models | √ | √ | √ |
92+ | Grok-1 | x | x | x |
93+ | SEA-LION | √ | √ | √ |
8294| GritLM-7B | √ | √ | √ |
83- | internlm2_5-7b-chat | √ | √ | √ |
84- | koala-7B-HF | √ | √ | √ |
85- | Llama-2-7b-chat-hf | √ | √ | √ |
86- | Llama-3-Smaug-8B | √ | √ | √ |
87- | Llama2-Chinese-7b-Chat | √ | √ | √ |
88- | Llama3-8B | √ | √ | √ |
89- | Llama3-8b-chinese | √ | √ | √ |
90- | mamba-130m-hf | √ | √ | √ |
91- | Mistral-7B-Instruct-v0.2 | √ | √ | √ |
92- | Mixtral-8x7B-Instruct-v0.1 | x | √ | √ |
93- | mpt-7B | √ | √ | √ |
94- | OLMo-1B-hf | √ | √ | √ |
95- | OpenELM-3B-Instruct | √ | √ | √ |
96- | Orion-14b-base | √ | √ | √ |
97- | phi1 | x | x | x |
98- | phi2 | x | x | x |
99- | Phi-3-mini-4k-instruct | √ | √ | √ |
100- | plamo-13b | √ | √ | √ |
101- | pythia-70M | x | x | x |
102- | Qwen-7B | √ | √ | √ |
103- | Qwen2-1.5B-Instruct | √ | x | √ |
104- | Refact-1_6B-fim | √ | √ | √ |
105- | SmolLM-135M | √ | √ | √ |
106- | stablelm-zephyr | x | x | x |
107- | stablelm-2-zephyr-1_6b | x | x | x |
108- | starcoderbase-1b | √ | √ | √ |
109- | starcoder2-3b | √ | √ | √ |
110- | vigogne-7b-chat | √ | √ | √ |
111- | xverse-7b-chat | √ | √ | √ |
112- | Yi-6b-Chat | √ | √ | √ |
113- | snowflake-arctic-embed | √ | × | × |
114- | all-minilm | √ | × | × |
115- | granite-embedding | √ | × | × |
116- | smollm | √ | √ | √ |
117- | smollm2 | √ | √ | √ |
118- | nomic-embed-text | √ | × | × |
119- | qwen2 | √ | √ | √ |
120- | reader-lm | √ | √ | √ |
121- | qwen2.5 | √ | √ | √ |
122- | qwen2.5-coder | √ | √ | √ |
123- | qwen | √ | √ | √ |
124- | paraphrase-multilingual | √ | × | × |
125- | tinydolphin | √ | √ | √ |
126- | tinyllama | √ | √ | √ |
127- | mxbai-embed-large | √ | × | × |
128- | bge-large | √ | × | × |
129- | starcoder | √ | √ | √ |
130- | granite3-moe | √ | √ | √ |
131- | llama3 | √ | √ | √ |
132- | deepseek-coder | √ | √ | √ |
133- | granite3 | √ | √ | √ |
134- | moondream | √ | √ | √ |
135- | yi-coder | √ | √ | √ |
136- | llama-guard3 | √ | √ | √ |
137- | qwen2-math | √ | √ | √ |
138- | stablelm2 | × | √ | √ |
139- | sailor2 | √ | √ | × |
140- | gemma3 | √ | √ | × |
141- | internlm2 | √ | √ | √ |
142- | bge-m3 | √ | × | × |
143- | granite3-dense | √ | √ | √ |
144- | codegemma | √ | √ | √ |
145- | phi | √ | √ | √ |
146- | dolphin-phi | × | √ | √ |
147- | stable-code | √ | √ | √ |
148- | stablelm-zephyr | √ | √ | √ |
149- | gemma2 | √ | √ | √ |
150- | shieldgemma | × | √ | √ |
151- | gemma | √ | √ | √ |
152- | starcoder2 | √ | √ | √ |
153- | falcon3 | √ | √ | × |
154- | deepseek-r1 | √ | √ | × |
155- | deepscaler | √ | √ | × |
156- | hermes3 | √ | √ | √ |
157- | orca-mini | √ | √ | √ |
158- | granite-code | √ | √ | √ |
159- | opencoder | √ | √ | × |
160- | nuextract | √ | √ | √ |
161- | phi3 | √ | √ | √ |
162- | phi3.5 | √ | √ | √ |
163- | nemotron-mini | √ | √ | √ |
164- | granite3-guardian | √ | √ | × |
165- | exaone3.5 | √ | √ | × |
166- | exaone-deep | √ | √ | × |
167- | yi | √ | √ | √ |
168- | smallthinker | √ | √ | × |
169- | yarn-llama2 | √ | √ | √ |
170- | xwinlm | √ | √ | √ |
171- | wizard-vicuna-uncensored | √ | √ | √ |
172- | vicuna | √ | √ | √ |
173- | stable-beluga | √ | √ | √ |
174- | nous-hermes | √ | √ | √ |
175- | medllama2 | √ | √ | √ |
176- | llama2-uncensored | √ | √ | √ |
177- | meditron | √ | √ | √ |
178- | llava | √ | √ | √ |
179- | magicoder | √ | √ | √ |
180- | wizardlm | √ | √ | √ |
181- | wizard-math | √ | √ | √ |
182- | wizardcoder | √ | √ | √ |
183- | orca2 | √ | √ | √ |
184- | codellama | √ | √ | √ |
185- | duckdb-nsql | √ | √ | √ |
186- | llama2 | √ | √ | √ |
187- | deepseek-llm | √ | √ | √ |
188- | phi4-mini | √ | √ | × |
189- | samantha-mistral | × | √ | √ |
190- | yarn-mistral | √ | √ | √ |
191- | sqlcoder | √ | √ | √ |
192- | neural-chat | √ | √ | √ |
193- | bakllava | √ | √ | √ |
194- | wizardlm2 | √ | √ | √ |
195- | dolphin-mistral | √ | √ | √ |
196- | mistral-openorca | √ | √ | √ |
197- | openhermes | √ | √ | √ |
198- | mistrallite | √ | √ | √ |
199- | notus | √ | √ | √ |
200- | zephyr | √ | √ | √ |
201- | mistral | √ | √ | √ |
202- | openchat | √ | √ | √ |
203- | mathstral | √ | √ | √ |
204- | codeqwen | √ | √ | √ |
205- | falcon | √ | √ | √ |
206- | dolphincoder | √ | √ | √ |
207- | minicpm-v | √ | √ | √ |
208- | bespoke-minicheck | √ | √ | √ |
209- | llama3-chatqa | √ | √ | √ |
210- | llama3-gradient | √ | √ | √ |
211- | dolphin-llama3 | √ | √ | √ |
212- | llama3-groq-tool-use | × | √ | √ |
213- | llama-pro | √ | √ | √ |
214- | aya | × | √ | √ |
215- | aya-expanse | √ | √ | √ |
216- | codegeex4 | × | √ | √ |
217- | glm4 | √ | √ | √ |
218- | solar | √ | √ | √ |
219- | nous-hermes2 | √ | √ | √ |
220- | falcon2 | √ | √ | √ |
221- | mistral-nemo | √ | √ | √ |
222- | llama2-chinese | √ | √ | × |
223- | wizard-vicuna | √ | √ | √ |
224- | codeup | √ | √ | √ |
225- | open-orca-platypus2 | √ | √ | √ |
226- | nexusraven | √ | √ | √ |
227- | everythinglm | √ | √ | √ |
228- | llava-phi3 | √ | × | × |
229- | starling-lm | √ | √ | × |
230- | olmo2 | √ | √ | × |
231- | marco-o1 | × | √ | × |
232- | openthinker | √ | √ | × |
233- | dolphin3 | √ | √ | × |
234- | tulu3 | √ | √ | × |
235- | command-r7b | √ | √ | × |
236- | command-r7b-arabic | √ | √ | × |
237- | deepseek-v2 | × | √ | √ |
238- | deepseek-coder-v2 | × | √ | √ |
239- | codestral | × | √ | √ |
240- | mistral-small | × | √ | √ |
241- | wizardlm-uncensored | √ | √ | × |
242- | phi4 | × | √ | × |
243- | llava-llama3 | √ | × | × |
244- | command-r | × | × | √ |
245- | phind-codellama | × | × | √ |
246- | codebooga | × | × | √ |
247- | alfred | × | × | √ |
95+ | OLMo | √ | √ | √ |
96+ | OLMo 2 | √ | √ | √ |
97+ | OLMoE | x | x | x |
98+ | Granite models | √ | √ | √ |
99+ | GPT-NeoX + Pythia | x | x | x |
100+ | Snowflake-Arctic MoE | x | × | × |
101+ | Smaug | √ | √ | √ |
102+ | Poro 34B | √ | √ | √ |
103+ | Bitnet b1.58 models | √ | √ | √ |
104+ | Flan-T5 | √ | √ | √ |
105+ | Open Elm models | √ | √ | √ |
106+ | chatGLM3-6B + ChatGLM4-9b + GLMEdge-1.5b + GLMEdge-4b | √ | √ | √ |
107+ | GLM-4-0414 | √ | √ | √ |
108+ | SmolLM | √ | √ | √ |
109+ | EXAONE-3.0-7.8B-Instruct | √ | √ | √ |
110+ | FalconMamba Models | √ | √ | √ |
111+ | Jais Models | x | x | x |
112+ | Bielik-11B-v2.3 | √ | √ | √ |
113+ | RWKV-6 | x | x | x |
114+ | QRWKV-6 | x | x | x |
115+ | GigaChat-20B-A3B | x | x | x |
116+ | Trillion-7B-preview | √ | √ | √ |
117+ | Ling models | √ | √ | √ |
118+
119+
120+ ** Multimodal**
121+ | LLaVA 1.5 models, LLaVA 1.6 models | √ | √ | √ |
122+ | BakLLaVA | x | x | x |
123+ | Obsidian | x | x | x |
124+ | ShareGPT4V | x | x | x |
125+ | MobileVLM 1.7B/3B models | x | x | x |
126+ | Yi-VL | x | x | x |
127+ | Mini CPM | √ | √ | √ |
128+ | Moondream | √ | √ | √ |
129+ | Bunny | x | x | x |
130+ | GLM-EDGE | x | x | x |
131+ | Qwen2-VL | √ | √ | √ |
248132
249133
250134
0 commit comments