Skip to content

Commit 052d4f1

Browse files
authored
support qwen110b gptq int4 (#818)
1 parent 0247075 commit 052d4f1

File tree

3 files changed

+14
-0
lines changed

3 files changed

+14
-0
lines changed

docs/source/LLM/支持的模型和数据集.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
|qwen1half-14b-chat-int4|[qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-14B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-14B-Chat-GPTQ-Int4)|
6060
|qwen1half-32b-chat-int4|[qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-32B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-32B-Chat-GPTQ-Int4)|
6161
|qwen1half-72b-chat-int4|[qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-72B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-72B-Chat-GPTQ-Int4)|
62+
|qwen1half-110b-chat-int4|[qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-110B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-110B-Chat-GPTQ-Int4)|
6263
|qwen1half-0_5b-chat-int8|[qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8)|
6364
|qwen1half-1_8b-chat-int8|[qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8)|
6465
|qwen1half-4b-chat-int8|[qwen/Qwen1.5-4B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-4B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-4B-Chat-GPTQ-Int8)|

docs/source_en/LLM/Supported-models-datasets.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ The table below introcudes all models supported by SWIFT:
5959
|qwen1half-14b-chat-int4|[qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-14B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-14B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-14B-Chat-GPTQ-Int4)|
6060
|qwen1half-32b-chat-int4|[qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-32B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-32B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-32B-Chat-GPTQ-Int4)|
6161
|qwen1half-72b-chat-int4|[qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-72B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-72B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-72B-Chat-GPTQ-Int4)|
62+
|qwen1half-110b-chat-int4|[qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen1.5-110B-Chat-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-110B-Chat-GPTQ-Int4](https://huggingface.co/Qwen/Qwen1.5-110B-Chat-GPTQ-Int4)|
6263
|qwen1half-0_5b-chat-int8|[qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8)|
6364
|qwen1half-1_8b-chat-int8|[qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8)|
6465
|qwen1half-4b-chat-int8|[qwen/Qwen1.5-4B-Chat-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen|✔|✘|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen1.5-4B-Chat-GPTQ-Int8](https://huggingface.co/Qwen/Qwen1.5-4B-Chat-GPTQ-Int8)|

swift/llm/utils/model.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ class ModelType:
8989
qwen1half_14b_chat_int4 = 'qwen1half-14b-chat-int4'
9090
qwen1half_32b_chat_int4 = 'qwen1half-32b-chat-int4'
9191
qwen1half_72b_chat_int4 = 'qwen1half-72b-chat-int4'
92+
qwen1half_110b_chat_int4 = 'qwen1half-110b-chat-int4'
9293
qwen1half_0_5b_chat_int8 = 'qwen1half-0_5b-chat-int8'
9394
qwen1half_1_8b_chat_int8 = 'qwen1half-1_8b-chat-int8'
9495
qwen1half_4b_chat_int8 = 'qwen1half-4b-chat-int8'
@@ -2232,6 +2233,17 @@ def get_model_tokenizer_qwen1half(model_dir: str,
22322233
support_flash_attn=True,
22332234
support_vllm=True,
22342235
hf_model_id='Qwen/Qwen1.5-72B-Chat-GPTQ-Int4')
2236+
@register_model(
2237+
ModelType.qwen1half_110b_chat_int4,
2238+
'qwen/Qwen1.5-110B-Chat-GPTQ-Int4',
2239+
LoRATM.qwen1half,
2240+
TemplateType.qwen,
2241+
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
2242+
torch_dtype=torch.float16,
2243+
function_kwargs={'gptq_bits': 4},
2244+
support_flash_attn=True,
2245+
support_vllm=True,
2246+
hf_model_id='Qwen/Qwen1.5-110B-Chat-GPTQ-Int4')
22352247
@register_model(
22362248
ModelType.qwen1half_72b_chat_int8,
22372249
'qwen/Qwen1.5-72B-Chat-GPTQ-Int8',

0 commit comments

Comments
 (0)