support more models (#1552)

Jintao-Huang · web-flow · commit 3fad43135928 · 2024-07-31T15:23:46.000+08:00
diff --git a/docs/source/LLM/支持的模型和数据集.md b/docs/source/LLM/支持的模型和数据集.md
@@ -133,6 +133,9 @@
 |llama3-70b-instruct-awq|[swift/Meta-Llama-3-70B-Instruct-AWQ](https://modelscope.cn/models/swift/Meta-Llama-3-70B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|autoawq|-|[study-hjt/Meta-Llama-3-70B-Instruct-AWQ](https://huggingface.co/study-hjt/Meta-Llama-3-70B-Instruct-AWQ)|
 |llama3_1-8b|[LLM-Research/Meta-Llama-3.1-8B](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)|
 |llama3_1-8b-instruct|[LLM-Research/Meta-Llama-3.1-8B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2714;|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)|
+|llama3_1-8b-instruct-awq|[LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43, autoawq|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4)|
+|llama3_1-8b-instruct-gptq-int4|[LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43, auto_gptq|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4)|
+|llama3_1-8b-instruct-bnb|[LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43, bitsandbytes|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4)|
 |llama3_1-70b|[LLM-Research/Meta-Llama-3.1-70B](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B)|
 |llama3_1-70b-instruct|[LLM-Research/Meta-Llama-3.1-70B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2714;|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)|
 |llama3_1-70b-instruct-fp8|[LLM-Research/Meta-Llama-3.1-70B-Instruct-FP8](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B-Instruct-FP8/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B-Instruct-FP8](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct-FP8)|
diff --git a/docs/source_en/LLM/Supported-models-datasets.md b/docs/source_en/LLM/Supported-models-datasets.md
@@ -133,6 +133,9 @@ The table below introcudes all models supported by SWIFT:
 |llama3-70b-instruct-awq|[swift/Meta-Llama-3-70B-Instruct-AWQ](https://modelscope.cn/models/swift/Meta-Llama-3-70B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|autoawq|-|[study-hjt/Meta-Llama-3-70B-Instruct-AWQ](https://huggingface.co/study-hjt/Meta-Llama-3-70B-Instruct-AWQ)|
 |llama3_1-8b|[LLM-Research/Meta-Llama-3.1-8B](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)|
 |llama3_1-8b-instruct|[LLM-Research/Meta-Llama-3.1-8B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2714;|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)|
+|llama3_1-8b-instruct-awq|[LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43, autoawq|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4)|
+|llama3_1-8b-instruct-gptq-int4|[LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43, auto_gptq|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4)|
+|llama3_1-8b-instruct-bnb|[LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43, bitsandbytes|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4)|
 |llama3_1-70b|[LLM-Research/Meta-Llama-3.1-70B](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B)|
 |llama3_1-70b-instruct|[LLM-Research/Meta-Llama-3.1-70B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2714;|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)|
 |llama3_1-70b-instruct-fp8|[LLM-Research/Meta-Llama-3.1-70B-Instruct-FP8](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B-Instruct-FP8/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B-Instruct-FP8](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct-FP8)|
diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
@@ -174,6 +174,9 @@ class ModelType:
     # llama3.1
     llama3_1_8b = 'llama3_1-8b'
     llama3_1_8b_instruct = 'llama3_1-8b-instruct'
+    llama3_1_8b_instruct_awq = 'llama3_1-8b-instruct-awq'
+    llama3_1_8b_instruct_gptq_int4 = 'llama3_1-8b-instruct-gptq-int4'
+    llama3_1_8b_instruct_bnb = 'llama3_1-8b-instruct-bnb'
     llama3_1_70b = 'llama3_1-70b'
     llama3_1_70b_instruct = 'llama3_1-70b-instruct'
     llama3_1_70b_instruct_fp8 = 'llama3_1-70b-instruct-fp8'
@@ -4467,6 +4470,37 @@ def get_model_tokenizer_deepseek_vl(model_dir: str,
     requires=['transformers>=4.43'],
     ignore_file_pattern=[r'.+\.pth$'],
     hf_model_id='meta-llama/Meta-Llama-3.1-70B')
+@register_model(
+    ModelType.llama3_1_8b_instruct_bnb,
+    'LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4',
+    LoRATM.llama,
+    TemplateType.llama3,
+    support_flash_attn=True,
+    support_vllm=True,
+    requires=['transformers>=4.43', 'bitsandbytes'],
+    hf_model_id='hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4')
+@register_model(
+    ModelType.llama3_1_8b_instruct_gptq_int4,
+    'LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4',
+    LoRATM.llama,
+    TemplateType.llama3,
+    support_flash_attn=True,
+    support_vllm=True,
+    requires=['transformers>=4.43', 'auto_gptq'],
+    torch_dtype=torch.float16,
+    function_kwargs={'gptq_bits': 4},
+    hf_model_id='hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4')
+@register_model(
+    ModelType.llama3_1_8b_instruct_awq,
+    'LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4',
+    LoRATM.llama,
+    TemplateType.llama3,
+    support_flash_attn=True,
+    support_vllm=True,
+    requires=['transformers>=4.43', 'autoawq'],
+    torch_dtype=torch.float16,
+    function_kwargs={'is_awq': True},
+    hf_model_id='hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4')
 @register_model(
     ModelType.llama3_1_8b_instruct,
     'LLM-Research/Meta-Llama-3.1-8B-Instruct',