Add 34b quantized model (#920)

tastelikefeet · web-flow · commit 0813466f9d6f · 2024-05-13T15:02:32.000+08:00
diff --git a/docs/source/LLM/支持的模型和数据集.md b/docs/source/LLM/支持的模型和数据集.md
@@ -137,10 +137,12 @@
 |yi-34b-chat-int8|[01ai/Yi-34B-Chat-8bits](https://modelscope.cn/models/01ai/Yi-34B-Chat-8bits/summary)|q_proj, k_proj, v_proj|yi|&#x2714;|&#x2714;|auto_gptq|-|[01-ai/Yi-34B-Chat-8bits](https://huggingface.co/01-ai/Yi-34B-Chat-8bits)|
 |yi-1_5-6b|[01ai/Yi-1.5-6B](https://modelscope.cn/models/01ai/Yi-1.5-6B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;||-|[01-ai/Yi-1.5-6B](https://huggingface.co/01-ai/Yi-1.5-6B)|
 |yi-1_5-6b-chat|[01ai/Yi-1.5-6B-Chat](https://modelscope.cn/models/01ai/Yi-1.5-6B-Chat/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;||-|[01-ai/Yi-1.5-6B-Chat](https://huggingface.co/01-ai/Yi-1.5-6B-Chat)|
-|yi-1_5-6b-chat-awq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|autoawq|-|-|
-|yi-1_5-6b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|auto_gptq>=0.5|-|-|
-|yi-1_5-9b-chat-awq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|autoawq|-|-|
-|yi-1_5-9b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|auto_gptq>=0.5|-|-|
+|yi-1_5-6b-chat-awq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|autoawq|-|[modelscope/Yi-1.5-6B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-6B-Chat-AWQ)|
+|yi-1_5-6b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|auto_gptq>=0.5|-|[modelscope/Yi-1.5-6B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-6B-Chat-GPTQ)|
+|yi-1_5-9b-chat-awq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|autoawq|-|[modelscope/Yi-1.5-9B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-9B-Chat-AWQ)|
+|yi-1_5-9b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|auto_gptq>=0.5|-|[modelscope/Yi-1.5-9B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-9B-Chat-GPTQ)|
+|yi-1_5-34b-chat-awq-int4|[AI-ModelScope/Yi-1.5-34B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-34B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|autoawq|-|[modelscope/Yi-1.5-34B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-34B-Chat-AWQ)|
+|yi-1_5-34b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-34B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-34B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|auto_gptq>=0.5|-|[modelscope/Yi-1.5-34B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-34B-Chat-GPTQ)|
 |yi-1_5-9b|[01ai/Yi-1.5-9B](https://modelscope.cn/models/01ai/Yi-1.5-9B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;||-|[01-ai/Yi-1.5-9B](https://huggingface.co/01-ai/Yi-1.5-9B)|
 |yi-1_5-9b-chat|[01ai/Yi-1.5-9B-Chat](https://modelscope.cn/models/01ai/Yi-1.5-9B-Chat/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;||-|[01-ai/Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat)|
 |yi-1_5-34b|[01ai/Yi-1.5-34B](https://modelscope.cn/models/01ai/Yi-1.5-34B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;||-|[01-ai/Yi-1.5-34B](https://huggingface.co/01-ai/Yi-1.5-34B)|
diff --git a/docs/source_en/LLM/Supported-models-datasets.md b/docs/source_en/LLM/Supported-models-datasets.md
@@ -137,10 +137,12 @@ The table below introcudes all models supported by SWIFT:
 |yi-34b-chat-int8|[01ai/Yi-34B-Chat-8bits](https://modelscope.cn/models/01ai/Yi-34B-Chat-8bits/summary)|q_proj, k_proj, v_proj|yi|&#x2714;|&#x2714;|auto_gptq|-|[01-ai/Yi-34B-Chat-8bits](https://huggingface.co/01-ai/Yi-34B-Chat-8bits)|
 |yi-1_5-6b|[01ai/Yi-1.5-6B](https://modelscope.cn/models/01ai/Yi-1.5-6B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;||-|[01-ai/Yi-1.5-6B](https://huggingface.co/01-ai/Yi-1.5-6B)|
 |yi-1_5-6b-chat|[01ai/Yi-1.5-6B-Chat](https://modelscope.cn/models/01ai/Yi-1.5-6B-Chat/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;||-|[01-ai/Yi-1.5-6B-Chat](https://huggingface.co/01-ai/Yi-1.5-6B-Chat)|
-|yi-1_5-6b-chat-awq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|autoawq|-|-|
-|yi-1_5-6b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|auto_gptq>=0.5|-|-|
-|yi-1_5-9b-chat-awq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|autoawq|-|-|
-|yi-1_5-9b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|auto_gptq>=0.5|-|-|
+|yi-1_5-6b-chat-awq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|autoawq|-|[modelscope/Yi-1.5-6B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-6B-Chat-AWQ)|
+|yi-1_5-6b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-6B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-6B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|auto_gptq>=0.5|-|[modelscope/Yi-1.5-6B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-6B-Chat-GPTQ)|
+|yi-1_5-9b-chat-awq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|autoawq|-|[modelscope/Yi-1.5-9B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-9B-Chat-AWQ)|
+|yi-1_5-9b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-9B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-9B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|auto_gptq>=0.5|-|[modelscope/Yi-1.5-9B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-9B-Chat-GPTQ)|
+|yi-1_5-34b-chat-awq-int4|[AI-ModelScope/Yi-1.5-34B-Chat-AWQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-34B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|autoawq|-|[modelscope/Yi-1.5-34B-Chat-AWQ](https://huggingface.co/modelscope/Yi-1.5-34B-Chat-AWQ)|
+|yi-1_5-34b-chat-gptq-int4|[AI-ModelScope/Yi-1.5-34B-Chat-GPTQ](https://modelscope.cn/models/AI-ModelScope/Yi-1.5-34B-Chat-GPTQ/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;|auto_gptq>=0.5|-|[modelscope/Yi-1.5-34B-Chat-GPTQ](https://huggingface.co/modelscope/Yi-1.5-34B-Chat-GPTQ)|
 |yi-1_5-9b|[01ai/Yi-1.5-9B](https://modelscope.cn/models/01ai/Yi-1.5-9B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;||-|[01-ai/Yi-1.5-9B](https://huggingface.co/01-ai/Yi-1.5-9B)|
 |yi-1_5-9b-chat|[01ai/Yi-1.5-9B-Chat](https://modelscope.cn/models/01ai/Yi-1.5-9B-Chat/summary)|q_proj, k_proj, v_proj|yi1_5|&#x2714;|&#x2714;||-|[01-ai/Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat)|
 |yi-1_5-34b|[01ai/Yi-1.5-34B](https://modelscope.cn/models/01ai/Yi-1.5-34B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;||-|[01-ai/Yi-1.5-34B](https://huggingface.co/01-ai/Yi-1.5-34B)|
diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
@@ -179,6 +179,8 @@ class ModelType:
     yi_1_5_6b_chat_gptq_int4 = 'yi-1_5-6b-chat-gptq-int4'
     yi_1_5_9b_chat_awq_int4 = 'yi-1_5-9b-chat-awq-int4'
     yi_1_5_9b_chat_gptq_int4 = 'yi-1_5-9b-chat-gptq-int4'
+    yi_1_5_34b_chat_awq_int4 = 'yi-1_5-34b-chat-awq-int4'
+    yi_1_5_34b_chat_gptq_int4 = 'yi-1_5-34b-chat-gptq-int4'
     yi_1_5_9b = 'yi-1_5-9b'
     yi_1_5_9b_chat = 'yi-1_5-9b-chat'
     yi_1_5_34b = 'yi-1_5-34b'
@@ -1766,6 +1768,7 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
     torch_dtype=torch.float16,
     function_kwargs={'is_awq': True},
     support_flash_attn=True,
+    hf_model_id='modelscope/Yi-1.5-6B-Chat-AWQ',
     support_vllm=True)
 @register_model(
     ModelType.yi_1_5_6b_chat_gptq_int4,
@@ -1776,6 +1779,7 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
     function_kwargs={'gptq_bits': 4},
     torch_dtype=torch.float16,
     support_flash_attn=True,
+    hf_model_id='modelscope/Yi-1.5-6B-Chat-GPTQ',
     support_vllm=True)
 @register_model(
     ModelType.yi_1_5_9b_chat_awq_int4,
@@ -1786,6 +1790,7 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
     torch_dtype=torch.float16,
     function_kwargs={'is_awq': True},
     support_flash_attn=True,
+    hf_model_id='modelscope/Yi-1.5-9B-Chat-AWQ',
     support_vllm=True)
 @register_model(
     ModelType.yi_1_5_9b_chat_gptq_int4,
@@ -1796,6 +1801,29 @@ def cross_entropy_forward(self, inputs: Tensor, target: Tensor) -> Tensor:
     function_kwargs={'gptq_bits': 4},
     torch_dtype=torch.float16,
     support_flash_attn=True,
+    hf_model_id='modelscope/Yi-1.5-9B-Chat-GPTQ',
+    support_vllm=True)
+@register_model(
+    ModelType.yi_1_5_34b_chat_awq_int4,
+    'AI-ModelScope/Yi-1.5-34B-Chat-AWQ',
+    LoRATM.llama2,
+    TemplateType.yi1_5,
+    requires=['autoawq'],
+    torch_dtype=torch.float16,
+    function_kwargs={'is_awq': True},
+    support_flash_attn=True,
+    hf_model_id='modelscope/Yi-1.5-34B-Chat-AWQ',
+    support_vllm=True)
+@register_model(
+    ModelType.yi_1_5_34b_chat_gptq_int4,
+    'AI-ModelScope/Yi-1.5-34B-Chat-GPTQ',
+    LoRATM.llama2,
+    TemplateType.yi1_5,
+    requires=['auto_gptq>=0.5'],
+    function_kwargs={'gptq_bits': 4},
+    torch_dtype=torch.float16,
+    support_flash_attn=True,
+    hf_model_id='modelscope/Yi-1.5-34B-Chat-GPTQ',
     support_vllm=True)
 @register_model(
     ModelType.yi_1_5_9b,