Skip to content

Commit e4c792f

Browse files
authored
Support qwen1.5-110b awq (#821)
1 parent 052d4f1 commit e4c792f

File tree

4 files changed

+19
-4
lines changed

4 files changed

+19
-4
lines changed

docs/source/LLM/支持的模型和数据集.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
|qwen1half-14b-chat-awq|[qwen/Qwen1.5-14B-Chat-AWQ](https://modelscope.cn/models/qwen/Qwen1.5-14B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|transformers>=4.37, autoawq|-|[Qwen/Qwen1.5-14B-Chat-AWQ](https://huggingface.co/Qwen/Qwen1.5-14B-Chat-AWQ)|
7575
|qwen1half-32b-chat-awq|[qwen/Qwen1.5-32B-Chat-AWQ](https://modelscope.cn/models/qwen/Qwen1.5-32B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|transformers>=4.37, autoawq|-|[Qwen/Qwen1.5-32B-Chat-AWQ](https://huggingface.co/Qwen/Qwen1.5-32B-Chat-AWQ)|
7676
|qwen1half-72b-chat-awq|[qwen/Qwen1.5-72B-Chat-AWQ](https://modelscope.cn/models/qwen/Qwen1.5-72B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|transformers>=4.37, autoawq|-|[Qwen/Qwen1.5-72B-Chat-AWQ](https://huggingface.co/Qwen/Qwen1.5-72B-Chat-AWQ)|
77+
|qwen1half-110b-chat-awq|[qwen/Qwen1.5-110B-Chat-AWQ](https://modelscope.cn/models/qwen/Qwen1.5-110B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|transformers>=4.37, autoawq|-|[Qwen/Qwen1.5-110B-Chat-AWQ](https://huggingface.co/Qwen/Qwen1.5-110B-Chat-AWQ)|
7778
|codeqwen1half-7b-chat-awq|[qwen/CodeQwen1.5-7B-Chat-AWQ](https://modelscope.cn/models/qwen/CodeQwen1.5-7B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|transformers>=4.37, autoawq|-|[Qwen/CodeQwen1.5-7B-Chat-AWQ](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat-AWQ)|
7879
|qwen-vl|[qwen/Qwen-VL](https://modelscope.cn/models/qwen/Qwen-VL/summary)|c_attn|default-generation|✔|✘||multi-modal, vision|[Qwen/Qwen-VL](https://huggingface.co/Qwen/Qwen-VL)|
7980
|qwen-vl-chat|[qwen/Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary)|c_attn|qwen|✔|✘||multi-modal, vision|[Qwen/Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat)|

docs/source_en/LLM/Supported-models-datasets.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ The table below introcudes all models supported by SWIFT:
7474
|qwen1half-14b-chat-awq|[qwen/Qwen1.5-14B-Chat-AWQ](https://modelscope.cn/models/qwen/Qwen1.5-14B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|transformers>=4.37, autoawq|-|[Qwen/Qwen1.5-14B-Chat-AWQ](https://huggingface.co/Qwen/Qwen1.5-14B-Chat-AWQ)|
7575
|qwen1half-32b-chat-awq|[qwen/Qwen1.5-32B-Chat-AWQ](https://modelscope.cn/models/qwen/Qwen1.5-32B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|transformers>=4.37, autoawq|-|[Qwen/Qwen1.5-32B-Chat-AWQ](https://huggingface.co/Qwen/Qwen1.5-32B-Chat-AWQ)|
7676
|qwen1half-72b-chat-awq|[qwen/Qwen1.5-72B-Chat-AWQ](https://modelscope.cn/models/qwen/Qwen1.5-72B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|transformers>=4.37, autoawq|-|[Qwen/Qwen1.5-72B-Chat-AWQ](https://huggingface.co/Qwen/Qwen1.5-72B-Chat-AWQ)|
77+
|qwen1half-110b-chat-awq|[qwen/Qwen1.5-110B-Chat-AWQ](https://modelscope.cn/models/qwen/Qwen1.5-110B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|transformers>=4.37, autoawq|-|[Qwen/Qwen1.5-110B-Chat-AWQ](https://huggingface.co/Qwen/Qwen1.5-110B-Chat-AWQ)|
7778
|codeqwen1half-7b-chat-awq|[qwen/CodeQwen1.5-7B-Chat-AWQ](https://modelscope.cn/models/qwen/CodeQwen1.5-7B-Chat-AWQ/summary)|q_proj, k_proj, v_proj|qwen|✔|✔|transformers>=4.37, autoawq|-|[Qwen/CodeQwen1.5-7B-Chat-AWQ](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat-AWQ)|
7879
|qwen-vl|[qwen/Qwen-VL](https://modelscope.cn/models/qwen/Qwen-VL/summary)|c_attn|default-generation|✔|✘||multi-modal, vision|[Qwen/Qwen-VL](https://huggingface.co/Qwen/Qwen-VL)|
7980
|qwen-vl-chat|[qwen/Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary)|c_attn|qwen|✔|✘||multi-modal, vision|[Qwen/Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat)|

swift/llm/utils/model.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class ModelType:
106106
qwen1half_14b_chat_awq = 'qwen1half-14b-chat-awq'
107107
qwen1half_32b_chat_awq = 'qwen1half-32b-chat-awq'
108108
qwen1half_72b_chat_awq = 'qwen1half-72b-chat-awq'
109+
qwen1half_110b_chat_awq = 'qwen1half-110b-chat-awq'
109110
codeqwen1half_7b_chat_awq = 'codeqwen1half-7b-chat-awq'
110111

111112
# qwen-vl
@@ -1994,6 +1995,17 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
19941995
requires=['transformers>=4.37', 'autoawq'],
19951996
torch_dtype=torch.float16,
19961997
hf_model_id='Qwen/Qwen1.5-72B-Chat-AWQ')
1998+
@register_model(
1999+
ModelType.qwen1half_110b_chat_awq,
2000+
'qwen/Qwen1.5-110B-Chat-AWQ',
2001+
LoRATM.qwen1half,
2002+
TemplateType.qwen,
2003+
support_flash_attn=True,
2004+
support_vllm=True,
2005+
function_kwargs={'is_awq': True},
2006+
requires=['transformers>=4.37', 'autoawq'],
2007+
torch_dtype=torch.float16,
2008+
hf_model_id='Qwen/Qwen1.5-110B-Chat-AWQ')
19972009
@register_model(
19982010
ModelType.codeqwen1half_7b_chat_awq,
19992011
'qwen/CodeQwen1.5-7B-Chat-AWQ',

swift/llm/utils/utils.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -364,14 +364,15 @@ def find_all_linears(model: Module, quantization_bit: int,
364364
else:
365365
linear_cls = [Linear]
366366
if 'int4' in model_type or 'int8' in model_type:
367-
from bitsandbytes.nn import Linear4bit
368367
from peft.utils import get_auto_gptq_quant_linear, get_quantization_config
369368
gptq_quantization_config = get_quantization_config(model, 'gptq')
370369
AutoGPTQQuantLinear = get_auto_gptq_quant_linear(
371370
gptq_quantization_config)
372-
linear_cls = [Linear4bit]
373-
if AutoGPTQQuantLinear is not None:
374-
linear_cls.append(AutoGPTQQuantLinear)
371+
if AutoGPTQQuantLinear is None:
372+
from bitsandbytes.nn import Linear4bit
373+
linear_cls = [Linear4bit]
374+
else:
375+
linear_cls = [AutoGPTQQuantLinear]
375376
if 'awq' in model_type:
376377
from awq.modules.linear import WQLinear_GEMM
377378
linear_cls.append(WQLinear_GEMM)

0 commit comments

Comments
 (0)