Skip to content

Commit 0813466

Browse files
Add 34b quantized model (#920)
1 parent 1a9efa0 commit 0813466

File tree

3 files changed

+40
-8
lines changed

3 files changed

+40
-8
lines changed

docs/source/LLM/支持的模型和数据集.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,12 @@
137137
|yi-34b-chat-int8|[01ai/Yi-34B-Chat-8bits](https://modelscope.cn/models/01ai/Yi-34B-Chat-8bits/summary)|q_proj, k_proj, v_proj|yi|✔|✔|auto_gptq|-|[01-ai/Yi-34B-Chat-8bits](https://huggingface.co/01-ai/Yi-34B-Chat-8bits)|
138138
|yi-1_5-6b|[01ai/Yi-1.5-6B](https://modelscope.cn/models/01ai/Yi-1.5-6B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||-|[01-ai/Yi-1.5-6B](https://huggingface.co/01-ai/Yi-1.5-6B)|
139139
|yi-1_5-6b-chat|[01ai/Yi-1.5-6B-Chat](https://modelscope.cn/models/01ai/Yi-1.5-6B-Chat/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔||-|[01-ai/Yi-1.5-6B-Chat](https://huggingface.co/01-ai/Yi-1.5-6B-Chat)|
140-
|yi-1_5-6b-chat-awq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|autoawq|-|-|
141-
|yi-1_5-6b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|auto_gptq>=0.5|-|-|
142-
|yi-1_5-9b-chat-awq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|autoawq|-|-|
143-
|yi-1_5-9b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|auto_gptq>=0.5|-|-|
140+
|yi-1_5-6b-chat-awq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|autoawq|-|[modelscope/Yi-1.5-6B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-6B-Chat-AWQ)|
141+
|yi-1_5-6b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|auto_gptq>=0.5|-|[modelscope/Yi-1.5-6B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-6B-Chat-GPTQ)|
142+
|yi-1_5-9b-chat-awq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|autoawq|-|[modelscope/Yi-1.5-9B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-9B-Chat-AWQ)|
143+
|yi-1_5-9b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|auto_gptq>=0.5|-|[modelscope/Yi-1.5-9B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-9B-Chat-GPTQ)|
144+
|yi-1_5-34b-chat-awq-int4|[AI-ModelScope/Yi-1.5-34B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-34B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|autoawq|-|[modelscope/Yi-1.5-34B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-34B-Chat-AWQ)|
145+
|yi-1_5-34b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-34B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-34B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|auto_gptq>=0.5|-|[modelscope/Yi-1.5-34B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-34B-Chat-GPTQ)|
144146
|yi-1_5-9b|[01ai/Yi-1.5-9B](https://modelscope.cn/models/01ai/Yi-1.5-9B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||-|[01-ai/Yi-1.5-9B](https://huggingface.co/01-ai/Yi-1.5-9B)|
145147
|yi-1_5-9b-chat|[01ai/Yi-1.5-9B-Chat](https://modelscope.cn/models/01ai/Yi-1.5-9B-Chat/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔||-|[01-ai/Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat)|
146148
|yi-1_5-34b|[01ai/Yi-1.5-34B](https://modelscope.cn/models/01ai/Yi-1.5-34B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||-|[01-ai/Yi-1.5-34B](https://huggingface.co/01-ai/Yi-1.5-34B)|

docs/source_en/LLM/Supported-models-datasets.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,12 @@ The table below introcudes all models supported by SWIFT:
137137
|yi-34b-chat-int8|[01ai/Yi-34B-Chat-8bits](https://modelscope.cn/models/01ai/Yi-34B-Chat-8bits/summary)|q_proj, k_proj, v_proj|yi|✔|✔|auto_gptq|-|[01-ai/Yi-34B-Chat-8bits](https://huggingface.co/01-ai/Yi-34B-Chat-8bits)|
138138
|yi-1_5-6b|[01ai/Yi-1.5-6B](https://modelscope.cn/models/01ai/Yi-1.5-6B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||-|[01-ai/Yi-1.5-6B](https://huggingface.co/01-ai/Yi-1.5-6B)|
139139
|yi-1_5-6b-chat|[01ai/Yi-1.5-6B-Chat](https://modelscope.cn/models/01ai/Yi-1.5-6B-Chat/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔||-|[01-ai/Yi-1.5-6B-Chat](https://huggingface.co/01-ai/Yi-1.5-6B-Chat)|
140-
|yi-1_5-6b-chat-awq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|autoawq|-|-|
141-
|yi-1_5-6b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|auto_gptq>=0.5|-|-|
142-
|yi-1_5-9b-chat-awq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|autoawq|-|-|
143-
|yi-1_5-9b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|auto_gptq>=0.5|-|-|
140+
|yi-1_5-6b-chat-awq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|autoawq|-|[modelscope/Yi-1.5-6B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-6B-Chat-AWQ)|
141+
|yi-1_5-6b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|auto_gptq>=0.5|-|[modelscope/Yi-1.5-6B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-6B-Chat-GPTQ)|
142+
|yi-1_5-9b-chat-awq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|autoawq|-|[modelscope/Yi-1.5-9B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-9B-Chat-AWQ)|
143+
|yi-1_5-9b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|auto_gptq>=0.5|-|[modelscope/Yi-1.5-9B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-9B-Chat-GPTQ)|
144+
|yi-1_5-34b-chat-awq-int4|[AI-ModelScope/Yi-1.5-34B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-34B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|autoawq|-|[modelscope/Yi-1.5-34B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-34B-Chat-AWQ)|
145+
|yi-1_5-34b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-34B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-34B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔|auto_gptq>=0.5|-|[modelscope/Yi-1.5-34B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-34B-Chat-GPTQ)|
144146
|yi-1_5-9b|[01ai/Yi-1.5-9B](https://modelscope.cn/models/01ai/Yi-1.5-9B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||-|[01-ai/Yi-1.5-9B](https://huggingface.co/01-ai/Yi-1.5-9B)|
145147
|yi-1_5-9b-chat|[01ai/Yi-1.5-9B-Chat](https://modelscope.cn/models/01ai/Yi-1.5-9B-Chat/summary)|q_proj, k_proj, v_proj|yi1_5|✔|✔||-|[01-ai/Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat)|
146148
|yi-1_5-34b|[01ai/Yi-1.5-34B](https://modelscope.cn/models/01ai/Yi-1.5-34B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||-|[01-ai/Yi-1.5-34B](https://huggingface.co/01-ai/Yi-1.5-34B)|

swift/llm/utils/model.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ class ModelType:
179179
yi_1_5_6b_chat_gptq_int4 = 'yi-1_5-6b-chat-gptq-int4'
180180
yi_1_5_9b_chat_awq_int4 = 'yi-1_5-9b-chat-awq-int4'
181181
yi_1_5_9b_chat_gptq_int4 = 'yi-1_5-9b-chat-gptq-int4'
182+
yi_1_5_34b_chat_awq_int4 = 'yi-1_5-34b-chat-awq-int4'
183+
yi_1_5_34b_chat_gptq_int4 = 'yi-1_5-34b-chat-gptq-int4'
182184
yi_1_5_9b = 'yi-1_5-9b'
183185
yi_1_5_9b_chat = 'yi-1_5-9b-chat'
184186
yi_1_5_34b = 'yi-1_5-34b'
@@ -1766,6 +1768,7 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
17661768
torch_dtype=torch.float16,
17671769
function_kwargs={'is_awq': True},
17681770
support_flash_attn=True,
1771+
hf_model_id='modelscope/Yi-1.5-6B-Chat-AWQ',
17691772
support_vllm=True)
17701773
@register_model(
17711774
ModelType.yi_1_5_6b_chat_gptq_int4,
@@ -1776,6 +1779,7 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
17761779
function_kwargs={'gptq_bits': 4},
17771780
torch_dtype=torch.float16,
17781781
support_flash_attn=True,
1782+
hf_model_id='modelscope/Yi-1.5-6B-Chat-GPTQ',
17791783
support_vllm=True)
17801784
@register_model(
17811785
ModelType.yi_1_5_9b_chat_awq_int4,
@@ -1786,6 +1790,7 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
17861790
torch_dtype=torch.float16,
17871791
function_kwargs={'is_awq': True},
17881792
support_flash_attn=True,
1793+
hf_model_id='modelscope/Yi-1.5-9B-Chat-AWQ',
17891794
support_vllm=True)
17901795
@register_model(
17911796
ModelType.yi_1_5_9b_chat_gptq_int4,
@@ -1796,6 +1801,29 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
17961801
function_kwargs={'gptq_bits': 4},
17971802
torch_dtype=torch.float16,
17981803
support_flash_attn=True,
1804+
hf_model_id='modelscope/Yi-1.5-9B-Chat-GPTQ',
1805+
support_vllm=True)
1806+
@register_model(
1807+
ModelType.yi_1_5_34b_chat_awq_int4,
1808+
'AI-ModelScope/Yi-1.5-34B-Chat-AWQ',
1809+
LoRATM.llama2,
1810+
TemplateType.yi1_5,
1811+
requires=['autoawq'],
1812+
torch_dtype=torch.float16,
1813+
function_kwargs={'is_awq': True},
1814+
support_flash_attn=True,
1815+
hf_model_id='modelscope/Yi-1.5-34B-Chat-AWQ',
1816+
support_vllm=True)
1817+
@register_model(
1818+
ModelType.yi_1_5_34b_chat_gptq_int4,
1819+
'AI-ModelScope/Yi-1.5-34B-Chat-GPTQ',
1820+
LoRATM.llama2,
1821+
TemplateType.yi1_5,
1822+
requires=['auto_gptq>=0.5'],
1823+
function_kwargs={'gptq_bits': 4},
1824+
torch_dtype=torch.float16,
1825+
support_flash_attn=True,
1826+
hf_model_id='modelscope/Yi-1.5-34B-Chat-GPTQ',
17991827
support_vllm=True)
18001828
@register_model(
18011829
ModelType.yi_1_5_9b,

0 commit comments

Comments
 (0)