Skip to content

Commit 06d9d19

Browse files
authored
Fix issue 342 (#359)
1 parent 15296e8 commit 06d9d19

File tree

7 files changed

+18
-6
lines changed

7 files changed

+18
-6
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ app_ui_main(infer_args)
218218
- [zephyr](https://github.com/huggingface/alignment-handbook) series: zephyr-7b-beta-chat.
219219
- [ziya](https://github.com/IDEA-CCNL/Fengshenbang-LM) series: ziya2-13b, ziya2-13b-chat.
220220
- [skywork](https://github.com/SkyworkAI/Skywork) series: skywork-13b, skywork-13b-chat.
221-
- other: [polylm-13b](https://github.com/DAMO-NLP-MT/PolyLM), [seqgpt-560m](https://github.com/Alibaba-NLP/SeqGPT), [sus-34b-chat](https://github.com/SUSTech-IDEA/SUS-Chat), [openbmb-minicpm-2b](https://github.com/OpenBMB/CPM-Bee).
221+
- other: [polylm-13b](https://github.com/DAMO-NLP-MT/PolyLM), [seqgpt-560m](https://github.com/Alibaba-NLP/SeqGPT), [sus-34b-chat](https://github.com/SUSTech-IDEA/SUS-Chat), [openbmb-minicpm-2b-chat](https://github.com/OpenBMB/mlc-MiniCPM).
222222
- Financial:
223223
- [tongyi-finance](https://github.com/QwenLM/Qwen) series: tongyi-finance-14b, tongyi-finance-14b-chat, tongyi-finance-14b-chat-int4.
224224
- Coding:

README_CN.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ app_ui_main(infer_args)
218218
- [zephyr](https://github.com/huggingface/alignment-handbook) 系列: zephyr-7b-beta-chat.
219219
- [ziya](https://github.com/IDEA-CCNL/Fengshenbang-LM) 系列: ziya2-13b, ziya2-13b-chat.
220220
- [skywork](https://github.com/SkyworkAI/Skywork) 系列: skywork-13b, skywork-13b-chat.
221-
- other: [polylm-13b](https://github.com/DAMO-NLP-MT/PolyLM), [seqgpt-560m](https://github.com/Alibaba-NLP/SeqGPT), [sus-34b-chat](https://github.com/SUSTech-IDEA/SUS-Chat), [openbmb-minicpm-2b](https://github.com/OpenBMB/CPM-Bee).
221+
- other: [polylm-13b](https://github.com/DAMO-NLP-MT/PolyLM), [seqgpt-560m](https://github.com/Alibaba-NLP/SeqGPT), [sus-34b-chat](https://github.com/SUSTech-IDEA/SUS-Chat), [openbmb-minicpm-2b-chat](https://github.com/OpenBMB/mlc-MiniCPM).
222222
- 金融:
223223
- [tongyi-finance](https://github.com/QwenLM/Qwen) 系列: tongyi-finance-14b, tongyi-finance-14b-chat, tongyi-finance-14b-chat-int4.
224224
- 代码:

docs/source/LLM/支持的模型和数据集.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@
124124
|zephyr-7b-beta-chat|[modelscope/zephyr-7b-beta](https://modelscope.cn/models/modelscope/zephyr-7b-beta/summary)|q_proj, k_proj, v_proj|zephyr|✔|✔|transformers>=4.34|
125125
|polylm-13b|[damo/nlp_polylm_13b_text_generation](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation/summary)|c_attn|default-generation|✘|✘||
126126
|seqgpt-560m|[damo/nlp_seqgpt-560m](https://modelscope.cn/models/damo/nlp_seqgpt-560m/summary)|query_key_value|default-generation|✘|✔||
127-
|openbmb-minicpm-2b|[OpenBMB/miniCPM-bf16](https://modelscope.cn/models/OpenBMB/miniCPM-bf16/summary)|q_proj, k_proj, v_proj|openbmb|✔|✘||
127+
|openbmb-minicpm-2b-chat|[OpenBMB/miniCPM-bf16](https://modelscope.cn/models/OpenBMB/miniCPM-bf16/summary)|q_proj, k_proj, v_proj|openbmb|✔|✘||
128128
|sus-34b-chat|[SUSTC/SUS-Chat-34B](https://modelscope.cn/models/SUSTC/SUS-Chat-34B/summary)|q_proj, k_proj, v_proj|sus|✔|✔||
129129
|tongyi-finance-14b|[TongyiFinance/Tongyi-Finance-14B](https://modelscope.cn/models/TongyiFinance/Tongyi-Finance-14B/summary)|c_attn|default-generation|✔|✔||
130130
|tongyi-finance-14b-chat|[TongyiFinance/Tongyi-Finance-14B-Chat](https://modelscope.cn/models/TongyiFinance/Tongyi-Finance-14B-Chat/summary)|c_attn|qwen|✔|✔||

swift/llm/utils/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ class ModelType:
175175
# other
176176
polylm_13b = 'polylm-13b'
177177
seqgpt_560m = 'seqgpt-560m'
178-
openbmb_minicpm_2b = 'openbmb-minicpm-2b'
178+
openbmb_minicpm_2b_chat = 'openbmb-minicpm-2b-chat'
179179
sus_34b_chat = 'sus-34b-chat'
180180

181181
# domain-specific
@@ -1851,7 +1851,7 @@ def get_model_tokenizer_yi_vl(model_dir: str,
18511851

18521852

18531853
@register_model(
1854-
ModelType.openbmb_minicpm_2b,
1854+
ModelType.openbmb_minicpm_2b_chat,
18551855
'OpenBMB/miniCPM-bf16',
18561856
LoRATM.llama2,
18571857
TemplateType.openbmb,

swift/llm/utils/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ def find_all_linear_for_lora(model: Module, quantization_bit: int,
365365
lora_module_names = set()
366366
for name, module in model.named_modules():
367367
if isinstance(module, linear_cls):
368-
module_name = name.split('.')[-1]
368+
module_name = '.'.join(name.split('.')[-2:])
369369
if head_module_name not in module_name:
370370
lora_module_names.add(module_name)
371371
return list(lora_module_names)

swift/tuners/lora_layers.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,9 @@ def _create_and_replace(
399399
)
400400
self._convert_dtype(target, lora_config.lora_dtype)
401401
elif isinstance(target, linear_types):
402+
if target.__class__.__name__ == 'NonDynamicallyQuantizableLinear':
403+
# Fix issue: https://github.com/modelscope/swift/issues/342
404+
return
402405
target.update_layer(
403406
adapter_name,
404407
r,
@@ -496,6 +499,9 @@ def _create_new_module(lora_config, adapter_name, target, **kwargs):
496499
enable_lora=lora_config.enable_lora,
497500
**kwargs)
498501
elif isinstance(target_base_layer, torch.nn.Linear):
502+
if target_base_layer.__class__.__name__ == 'NonDynamicallyQuantizableLinear':
503+
# Fix issue: https://github.com/modelscope/swift/issues/342
504+
return None
499505
if kwargs['fan_in_fan_out']:
500506
warnings.warn(
501507
'fan_in_fan_out is set to True but the target module is `torch.nn.Linear`. '

tests/llm/test_run.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def test_basic(self):
5050
sft_args = SftArguments(
5151
model_type=model_type,
5252
template_type='AUTO',
53+
lora_target_modules='ALL',
5354
quantization_bit=quantization_bit,
5455
batch_size=2,
5556
eval_steps=5,
@@ -138,6 +139,7 @@ def test_vl_audio(self):
138139
template_type='AUTO',
139140
eval_steps=5,
140141
check_dataset_strategy='warning',
142+
lora_target_modules='ALL',
141143
train_dataset_sample=200,
142144
dataset=[dataset],
143145
output_dir=output_dir,
@@ -245,6 +247,7 @@ def test_cogagent_instruct(self):
245247
model_type=ModelType.cogagent_18b_instruct,
246248
dataset=DatasetName.coco_mini_en_2,
247249
train_dataset_sample=100,
250+
lora_target_modules='ALL',
248251
eval_steps=5,
249252
quantization_bit=4))
250253
best_model_checkpoint = output['best_model_checkpoint']
@@ -263,6 +266,7 @@ def test_xcomposer_chat(self):
263266
SftArguments(
264267
model_type=ModelType.internlm_xcomposer2_7b_chat,
265268
dataset=DatasetName.coco_mini_en,
269+
lora_target_modules='ALL',
266270
train_dataset_sample=100,
267271
eval_steps=5))
268272
best_model_checkpoint = output['best_model_checkpoint']
@@ -282,6 +286,7 @@ def test_yi_vl_6b_chat(self):
282286
SftArguments(
283287
model_type=ModelType.yi_vl_6b_chat,
284288
# dataset=DatasetName.capcha_images,
289+
lora_target_modules='ALL',
285290
train_dataset_sample=100,
286291
eval_steps=5,
287292
custom_train_dataset_path=[
@@ -303,6 +308,7 @@ def test_dpo(self):
303308
output = dpo_main(
304309
DPOArguments(
305310
model_type=ModelType.qwen_1_8b_chat,
311+
sft_type='full',
306312
dataset=DatasetName.hh_rlhf,
307313
train_dataset_sample=100,
308314
eval_steps=5))

0 commit comments

Comments
 (0)