Skip to content

Commit 3fad431

Browse files
authored
support more models (#1552)
1 parent 9d71d48 commit 3fad431

File tree

3 files changed

+40
-0
lines changed

3 files changed

+40
-0
lines changed

docs/source/LLM/支持的模型和数据集.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@
133133
|llama3-70b-instruct-awq|[swift/Meta-Llama-3-70B-Instruct-AWQ](https://modelscope.cn/models/swift/Meta-Llama-3-70B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✘|autoawq|-|[study-hjt/Meta-Llama-3-70B-Instruct-AWQ](https://huggingface.co/study-hjt/Meta-Llama-3-70B-Instruct-AWQ)|
134134
|llama3_1-8b|[LLM-Research/Meta-Llama-3.1-8B](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)|
135135
|llama3_1-8b-instruct|[LLM-Research/Meta-Llama-3.1-8B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✔|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)|
136+
|llama3_1-8b-instruct-awq|[LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✘|transformers>=4.43, autoawq|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4)|
137+
|llama3_1-8b-instruct-gptq-int4|[LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✘|transformers>=4.43, auto_gptq|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4)|
138+
|llama3_1-8b-instruct-bnb|[LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✘|transformers>=4.43, bitsandbytes|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4)|
136139
|llama3_1-70b|[LLM-Research/Meta-Llama-3.1-70B](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B)|
137140
|llama3_1-70b-instruct|[LLM-Research/Meta-Llama-3.1-70B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✔|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)|
138141
|llama3_1-70b-instruct-fp8|[LLM-Research/Meta-Llama-3.1-70B-Instruct-FP8](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B-Instruct-FP8/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✘|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B-Instruct-FP8](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct-FP8)|

docs/source_en/LLM/Supported-models-datasets.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ The table below introcudes all models supported by SWIFT:
133133
|llama3-70b-instruct-awq|[swift/Meta-Llama-3-70B-Instruct-AWQ](https://modelscope.cn/models/swift/Meta-Llama-3-70B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✘|autoawq|-|[study-hjt/Meta-Llama-3-70B-Instruct-AWQ](https://huggingface.co/study-hjt/Meta-Llama-3-70B-Instruct-AWQ)|
134134
|llama3_1-8b|[LLM-Research/Meta-Llama-3.1-8B](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B)|
135135
|llama3_1-8b-instruct|[LLM-Research/Meta-Llama-3.1-8B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✔|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)|
136+
|llama3_1-8b-instruct-awq|[LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✘|transformers>=4.43, autoawq|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4)|
137+
|llama3_1-8b-instruct-gptq-int4|[LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✘|transformers>=4.43, auto_gptq|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4)|
138+
|llama3_1-8b-instruct-bnb|[LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✘|transformers>=4.43, bitsandbytes|-|[hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4)|
136139
|llama3_1-70b|[LLM-Research/Meta-Llama-3.1-70B](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔|✔|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B)|
137140
|llama3_1-70b-instruct|[LLM-Research/Meta-Llama-3.1-70B-Instruct](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B-Instruct/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✔|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)|
138141
|llama3_1-70b-instruct-fp8|[LLM-Research/Meta-Llama-3.1-70B-Instruct-FP8](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-70B-Instruct-FP8/summary)|q_proj, k_proj, v_proj|llama3|✔|✔|✘|transformers>=4.43|-|[meta-llama/Meta-Llama-3.1-70B-Instruct-FP8](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct-FP8)|

swift/llm/utils/model.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,9 @@ class ModelType:
174174
# llama3.1
175175
llama3_1_8b = 'llama3_1-8b'
176176
llama3_1_8b_instruct = 'llama3_1-8b-instruct'
177+
llama3_1_8b_instruct_awq = 'llama3_1-8b-instruct-awq'
178+
llama3_1_8b_instruct_gptq_int4 = 'llama3_1-8b-instruct-gptq-int4'
179+
llama3_1_8b_instruct_bnb = 'llama3_1-8b-instruct-bnb'
177180
llama3_1_70b = 'llama3_1-70b'
178181
llama3_1_70b_instruct = 'llama3_1-70b-instruct'
179182
llama3_1_70b_instruct_fp8 = 'llama3_1-70b-instruct-fp8'
@@ -4467,6 +4470,37 @@ def get_model_tokenizer_deepseek_vl(model_dir: str,
44674470
requires=['transformers>=4.43'],
44684471
ignore_file_pattern=[r'.+\.pth$'],
44694472
hf_model_id='meta-llama/Meta-Llama-3.1-70B')
4473+
@register_model(
4474+
ModelType.llama3_1_8b_instruct_bnb,
4475+
'LLM-Research/Meta-Llama-3.1-8B-Instruct-BNB-NF4',
4476+
LoRATM.llama,
4477+
TemplateType.llama3,
4478+
support_flash_attn=True,
4479+
support_vllm=True,
4480+
requires=['transformers>=4.43', 'bitsandbytes'],
4481+
hf_model_id='hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4')
4482+
@register_model(
4483+
ModelType.llama3_1_8b_instruct_gptq_int4,
4484+
'LLM-Research/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4',
4485+
LoRATM.llama,
4486+
TemplateType.llama3,
4487+
support_flash_attn=True,
4488+
support_vllm=True,
4489+
requires=['transformers>=4.43', 'auto_gptq'],
4490+
torch_dtype=torch.float16,
4491+
function_kwargs={'gptq_bits': 4},
4492+
hf_model_id='hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4')
4493+
@register_model(
4494+
ModelType.llama3_1_8b_instruct_awq,
4495+
'LLM-Research/Meta-Llama-3.1-8B-Instruct-AWQ-INT4',
4496+
LoRATM.llama,
4497+
TemplateType.llama3,
4498+
support_flash_attn=True,
4499+
support_vllm=True,
4500+
requires=['transformers>=4.43', 'autoawq'],
4501+
torch_dtype=torch.float16,
4502+
function_kwargs={'is_awq': True},
4503+
hf_model_id='hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4')
44704504
@register_model(
44714505
ModelType.llama3_1_8b_instruct,
44724506
'LLM-Research/Meta-Llama-3.1-8B-Instruct',

0 commit comments

Comments
 (0)