@@ -56,60 +56,82 @@ The llama.cpp CANN backend is designed to support Ascend NPU. It utilize the abi
5656
5757## Model Supports
5858
59- | Model Name | FP16 | Q8_0 | Q4_0 |
59+ | Model Name | FP16 | Q4_0 | Q8_0 |
6060| :----------------------------| :-----:| :----:| :----:|
61- | AquilaChat2-7B | √ | √ | √ |
62- | Baichuan-7b | √ | √ | √ |
63- | Baichuan2-7B-Chat | √ | √ | √ |
64- | bitnet_b1_58-large | √ | √ | √ |
65- | bloom-560m | √ | x | √ |
66- | bloomz-alpaca-560m | √ | x | √ |
67- | c4ai-command-r-35B-v01 | x | x | x |
68- | chatglm3-6B | x | x | x |
69- | chinese-alpaca-2-1.3b | √ | √ | √ |
70- | CodeShell-7B | √ | √ | √ |
71- | deepseek-ai_deepseek-coder-1.3B-base | x | x | x |
72- | deepseek-ai_DeepSeek-V2-Lite | x | x | x |
73- | deepseek-coder-6.7B-instruct | x | x | x |
74- | DeepSeek-V2-Lite-64x1.5B | x | x | x |
75- | falcon-7b-instruct | √ | √ | √ |
76- | flan-t5-large | √ | √ | √ |
77- | gemma-2-9b-it | √ | √ | √ |
78- | glm-4-9B | x | x | x |
79- | gpt2 | √ | √ | √ |
80- | Gpt2-163M | √ | √ | √ |
81- | granite-3B-code-instruct | √ | √ | √ |
61+ | Llama-2 | √ | √ | √ |
62+ | Llama-3 | √ | √ | √ |
63+ | Mistral-7B | √ | √ | √ |
64+ | Mistral MOE | √ | √ | √ |
65+ | DBRX | - | - | - |
66+ | Falcon | √ | √ | √ |
67+ | Chinese LLaMA/Alpaca | √ | √ | √ |
68+ | Vigogne(French) | √ | √ | √ |
69+ | BERT | x | x | x |
70+ | Koala | √ | √ | √ |
71+ | Baichuan | √ | √ | √ |
72+ | Aquila 1 & 2 | √ | √ | √ |
73+ | Starcoder models | √ | √ | √ |
74+ | Refact | √ | √ | √ |
75+ | MPT | √ | √ | √ |
76+ | Bloom | √ | √ | √ |
77+ | Yi models | √ | √ | √ |
78+ | stablelm models | √ | √ | √ |
79+ | DeepSeek models | x | x | x |
80+ | Qwen models | √ | √ | √ |
81+ | PLaMo-13B | √ | √ | √ |
82+ | Phi models | √ | √ | √ |
83+ | PhiMoE | √ | √ | √ |
84+ | GPT-2 | √ | √ | √ |
85+ | Orion | √ | √ | √ |
86+ | InternlLM2 | √ | √ | √ |
87+ | CodeShell | √ | √ | √ |
88+ | Gemma | √ | √ | √ |
89+ | Mamba | √ | √ | √ |
90+ | Xverse | √ | √ | √ |
91+ | command-r models | √ | √ | √ |
92+ | Grok-1 | - | - | - |
93+ | SEA-LION | √ | √ | √ |
8294| GritLM-7B | √ | √ | √ |
83- | internlm2_5-7b-chat | √ | √ | √ |
84- | koala-7B-HF | √ | √ | √ |
85- | Llama-2-7b-chat-hf | √ | √ | √ |
86- | Llama-3-Smaug-8B | √ | √ | √ |
87- | Llama2-Chinese-7b-Chat | √ | √ | √ |
88- | Llama3-8B | √ | √ | √ |
89- | Llama3-8b-chinese | √ | √ | √ |
90- | mamba-130m-hf | √ | √ | √ |
91- | Mistral-7B-Instruct-v0.2 | √ | √ | √ |
92- | Mixtral-8x7B-Instruct-v0.1 | x | √ | √ |
93- | mpt-7B | √ | √ | √ |
94- | OLMo-1B-hf | √ | √ | √ |
95- | OpenELM-3B-Instruct | √ | √ | √ |
96- | Orion-14b-base | √ | √ | √ |
97- | phi1 | x | x | x |
98- | phi2 | x | x | x |
99- | Phi-3-mini-4k-instruct | √ | √ | √ |
100- | plamo-13b | √ | √ | √ |
101- | pythia-70M | x | x | x |
102- | Qwen-7B | √ | √ | √ |
103- | Qwen2-1.5B-Instruct | √ | x | √ |
104- | Refact-1_6B-fim | √ | √ | √ |
105- | SmolLM-135M | √ | √ | √ |
106- | stablelm-zephyr | x | x | x |
107- | stablelm-2-zephyr-1_6b | x | x | x |
108- | starcoderbase-1b | √ | √ | √ |
109- | starcoder2-3b | √ | √ | √ |
110- | vigogne-7b-chat | √ | √ | √ |
111- | xverse-7b-chat | √ | √ | √ |
112- | Yi-6b-Chat | √ | √ | √ |
95+ | OLMo | √ | √ | √ |
96+ | OLMo 2 | √ | √ | √ |
97+ | OLMoE | √ | √ | √ |
98+ | Granite models | √ | √ | √ |
99+ | GPT-NeoX | √ | √ | √ |
100+ | Pythia | √ | √ | √ |
101+ | Snowflake-Arctic MoE | - | - | - |
102+ | Smaug | √ | √ | √ |
103+ | Poro 34B | √ | √ | √ |
104+ | Bitnet b1.58 models | √ | x | x |
105+ | Flan-T5 | √ | √ | √ |
106+ | Open Elm models | x | √ | √ |
107+ | chatGLM3-6B + ChatGLM4-9b + GLMEdge-1.5b + GLMEdge-4b | √ | √ | √ |
108+ | GLM-4-0414 | √ | √ | √ |
109+ | SmolLM | √ | √ | √ |
110+ | EXAONE-3.0-7.8B-Instruct | √ | √ | √ |
111+ | FalconMamba Models | √ | √ | √ |
112+ | Jais Models | - | x | x |
113+ | Bielik-11B-v2.3 | √ | √ | √ |
114+ | RWKV-6 | - | √ | √ |
115+ | QRWKV-6 | √ | √ | √ |
116+ | GigaChat-20B-A3B | x | x | x |
117+ | Trillion-7B-preview | √ | √ | √ |
118+ | Ling models | √ | √ | √ |
119+
120+
121+ ** Multimodal**
122+ | Model Name | FP16 | Q4_0 | Q8_0 |
123+ | :----------------------------| :-----:| :----:| :----:|
124+ | LLaVA 1.5 models, LLaVA 1.6 models | x | x | x |
125+ | BakLLaVA | √ | √ | √ |
126+ | Obsidian | √ | - | - |
127+ | ShareGPT4V | x | - | - |
128+ | MobileVLM 1.7B/3B models | - | - | - |
129+ | Yi-VL | - | - | - |
130+ | Mini CPM | √ | √ | √ |
131+ | Moondream | √ | √ | √ |
132+ | Bunny | √ | - | - |
133+ | GLM-EDGE | √ | √ | √ |
134+ | Qwen2-VL | √ | √ | √ |
113135
114136
115137
0 commit comments