Merge commit 'a60c2c8f9eb21b2dd108a2a5bdfc88fd2ad8babb' into release/1.5

tastelikefeet · tastelikefeet · commit 29ca8f599325 · 2024-01-09T21:24:33.000+08:00
* commit 'a60c2c8f9eb21b2dd108a2a5bdfc88fd2ad8babb': fix_ziya_template_bug (#303) fix a bug may cause module on gpu throws error (#302) fix text label (#301) Support studio (#300) fix chatglm3 template bug (#298)
diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/longlora_ddp_ds/infer.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/longlora_ddp_ds/infer.sh
@@ -0,0 +1,13 @@
+# Experimental environment: A100
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python llm_infer.py \
+    --ckpt_dir "output/llama2-13b-chat/vx_xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --max_length 4096 \
+    --max_new_tokens 2048 \
+    --temperature 0.1 \
+    --top_p 0.7 \
+    --repetition_penalty 1.05 \
+    --do_sample true \
+    --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/llama2_13b_chat/longlora_ddp_ds/sft.sh b/examples/pytorch/llm/scripts/llama2_13b_chat/longlora_ddp_ds/sft.sh
@@ -0,0 +1,44 @@
+# Experimental environment: 2 * A100
+# 2 * 37GB GPU memory
+nproc_per_node=2
+
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0,1 \
+torchrun \
+    --nproc_per_node=$nproc_per_node \
+    --master_port 29500 \
+    llm_sft.py \
+    --model_id_or_path modelscope/Llama-2-13b-chat-ms \
+    --model_revision master \
+    --sft_type longlora \
+    --tuner_backend swift \
+    --template_type llama \
+    --dtype AUTO \
+    --output_dir output \
+    --ddp_backend nccl \
+    --dataset leetcode-python-en \
+    --train_dataset_sample -1 \
+    --num_train_epochs 1 \
+    --max_length 4096 \
+    --check_dataset_strategy warning \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0.01 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --push_to_hub false \
+    --hub_model_id llama2-13b-chat-longlora \
+    --hub_private_repo true \
+    --hub_token 'your-sdk-token' \
+    --deepspeed_config_path 'ds_config/zero2.json' \
+    --save_only_model true \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qalora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qalora/infer.sh
@@ -0,0 +1,14 @@
+# Experimental environment: V100, A10, 3090
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python llm_infer.py \
+    --ckpt_dir "output/qwen-7b-chat-int4/vx_xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --max_length 4096 \
+    --use_flash_attn false \
+    --max_new_tokens 2048 \
+    --temperature 0.1 \
+    --top_p 0.7 \
+    --repetition_penalty 1.05 \
+    --do_sample true \
+    --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qalora/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat_int4/qalora/sft.sh
@@ -0,0 +1,37 @@
+# Experimental environment: V100, A10, 3090
+# 14GB GPU memory
+PYTHONPATH=../../.. \
+CUDA_VISIBLE_DEVICES=0 \
+python llm_sft.py \
+    --model_id_or_path qwen/Qwen-7B-Chat-Int4 \
+    --model_revision master \
+    --sft_type qalora \
+    --tuner_backend swift \
+    --template_type qwen \
+    --dtype fp16 \
+    --output_dir output \
+    --dataset leetcode-python-en \
+    --train_dataset_sample -1 \
+    --num_train_epochs 1 \
+    --max_length 4096 \
+    --check_dataset_strategy warning \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0.01 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
+    --push_to_hub false \
+    --hub_model_id qwen-7b-chat-int4-qalora \
+    --hub_private_repo true \
+    --hub_token 'your-sdk-token' \
diff --git a/swift/llm/sft.py b/swift/llm/sft.py
@@ -181,6 +181,8 @@ def llm_sft(args: SftArguments) -> Dict[str, Union[str, Any]]:
         greater_is_better=args.predict_with_generate,
         sortish_sampler=True,
         optim=args.optim,
+        adam_beta1=args.adam_beta1,
+        adam_beta2=args.adam_beta2,
         hub_model_id=args.hub_model_id,
         hub_private_repo=args.hub_private_repo,
         push_hub_strategy=args.push_hub_strategy,
@@ -200,7 +202,8 @@ def llm_sft(args: SftArguments) -> Dict[str, Union[str, Any]]:
         disable_tqdm=args.disable_tqdm,
         save_on_each_node=args.save_on_each_node,
         acc_strategy=args.acc_strategy,
-        save_safetensors=args.save_safetensors)
+        save_safetensors=args.save_safetensors,
+        logging_first_step=True)
 
     if args.gradient_checkpointing:
         model.config.use_cache = False  # fix transformers==4.36
diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py
@@ -104,6 +104,8 @@ class SftArguments:
     # if max_steps >= 0, override num_train_epochs
     max_steps: int = -1
     optim: str = 'adamw_torch'
+    adam_beta1: float = 0.9
+    adam_beta2: float = 0.999
     learning_rate: Optional[float] = None
     weight_decay: float = 0.01
     gradient_accumulation_steps: Optional[int] = None
diff --git a/swift/llm/utils/dataset.py b/swift/llm/utils/dataset.py
@@ -409,7 +409,7 @@ def _repair_agent_conversations(conversations: str,
 
 advertise_gen_prompt = """Task: Generating advertisements based on keywords.
 Keywords: {query}
-Advertisements: """
+Advertisements:"""
 register_dataset(
     DatasetName.advertise_gen_zh,
     'lvjianjin/AdvertiseGen', ['train'], ['validation'],
@@ -513,7 +513,7 @@ def _preprocess_dureader_robust(dataset: HfDataset) -> HfDataset:
     prompt = """Task: Question Generation
 Context: {context}
 Answer: {answer}
-Question: """
+Question:"""
     query = []
     response = []
     for d in dataset:
@@ -850,7 +850,7 @@ def _preprocess_hc3(dataset: HfDataset) -> HfDataset:
 Question: {question}
 Answer: {answer}
 Category: Human, ChatGPT
-Output: """
+Output:"""
     query = []
     response = []
     for d in dataset:
@@ -978,6 +978,9 @@ def add_self_cognition_dataset(
         return concatenate_datasets([train_dataset, dataset])
 
 
+NoneType = type(None)
+
+
 def _check_dataset(
     dataset: Optional[None],
     check_dataset_strategy: Literal['none', 'discard', 'error', 'warning']
@@ -1003,7 +1006,7 @@ def _check_dataset(
                 continue
             else:
                 raise ValueError(f"d['response']: {d['response']}, i: {i}")
-        if has_query and not isinstance(d['response'], str):
+        if has_query and not isinstance(d['query'], (str, NoneType)):
             is_modified = True
             if check_dataset_strategy == 'discard':
                 continue
@@ -1012,7 +1015,7 @@ def _check_dataset(
                 continue
             else:
                 raise ValueError(f"d['query']: {d['query']}, i: {i}")
-        if has_history and not isinstance(d['history'], (list, type(None))):
+        if has_history and not isinstance(d['history'], (list, NoneType)):
             is_modified = True
             if check_dataset_strategy == 'discard':
                 continue
@@ -1021,7 +1024,7 @@ def _check_dataset(
                 continue
             else:
                 raise ValueError(f"d['history']: {d['history']}, i: {i}")
-        if has_system and not isinstance(d['system'], str):
+        if has_system and not isinstance(d['system'], (str, NoneType)):
             is_modified = True
             if check_dataset_strategy == 'discard':
                 continue
diff --git a/swift/llm/utils/preprocess.py b/swift/llm/utils/preprocess.py
@@ -232,7 +232,7 @@ def __init__(self, labels: List[str], task_name: str,
         self.prompt = f"""Task: {task_name}
 {inputs}
 Category: {category}
-Output: """
+Output:"""
         self.task_name = task_name
         self.is_pair_seq = is_pair_seq
 
diff --git a/swift/llm/utils/template.py b/swift/llm/utils/template.py
@@ -618,13 +618,13 @@ def register_template(template_type: str,
 
 register_template(
     TemplateType.chatglm3,
-    Template([[64790, 64792]], [[64795], '\n {{QUERY}}', [64796], '\n '], [],
+    Template([[64790, 64792]], [[64795], '\n {{QUERY}}', [64796], '\n'], [],
              [['eos_token_id']], None,
              [[64790, 64792, 64794], '\n {{SYSTEM}}']))
 
 register_template(
     TemplateType.deepseek,
-    Template([['bos_token_id']], ['User: {{QUERY}}\n\nAssistant: '],
+    Template([['bos_token_id']], ['User: {{QUERY}}\n\nAssistant:'],
              [['eos_token_id']], [['eos_token_id']], None,
              [['bos_token_id'], '{{SYSTEM}}\n\n']))
 
@@ -660,7 +660,7 @@ def register_template(template_type: str,
 )
 register_template(
     TemplateType.openbuddy,
-    Template([['bos_token_id']], ['User: {{QUERY}}\nAssistant: '], ['\n'],
+    Template([['bos_token_id']], ['User: {{QUERY}}\nAssistant:'], ['\n'],
              [['eos_token_id']], OPENBUDDY_DEFAULT_SYSTEM,
              [['bos_token_id'], '{{SYSTEM}}\n\n']))
 
@@ -674,7 +674,7 @@ def register_template(template_type: str,
              [['eos_token_id']], [['eos_token_id']], ''))
 register_template(
     TemplateType.ziya,
-    Template([['bos_token_id', '{{SYSTEM}}']], ['<human>:{{QUERY}}\n<bot>:'],
+    Template([['bos_token_id'], '{{SYSTEM}}'], ['<human>:{{QUERY}}\n<bot>:'],
              ['\n'], [['eos_token_id']], ''))
 
 register_template(
diff --git a/swift/tuners/base.py b/swift/tuners/base.py
@@ -461,7 +461,7 @@ def set_active_adapters(self,
                             adapter_names: Union[List[str], str],
                             offload=None):
         if not adapter_names:
-            return
+            adapter_names = []
 
         if isinstance(adapter_names, str):
             adapter_names = [adapter_names]
diff --git a/swift/tuners/utils.py b/swift/tuners/utils.py
@@ -339,8 +339,6 @@ def load(module: torch.nn.Module, adapter_name, module_key):
                 module, adapter_name=adapter_name, module_key=module_key)
             module.to(module.origin_device)
             delattr(module, 'origin_device')
-        else:
-            raise NotImplementedError
 
     @staticmethod
     def freeze_model():
diff --git a/swift/ui/app.py b/swift/ui/app.py
@@ -34,4 +34,5 @@ def run_ui():
             LLMTrain.build_ui(LLMTrain)
             LLMInfer.build_ui(LLMInfer)
 
-    app.queue().launch(height=800, share=False)
+    app.queue().launch(
+        height=800, share=bool(os.environ.get('WEBUI_SHARE', '0')))
diff --git a/swift/ui/base.py b/swift/ui/base.py
@@ -7,6 +7,8 @@
 from gradio import (Accordion, Button, Checkbox, Dropdown, Slider, Tab,
                     TabItem, Textbox)
 
+from swift.llm.utils.model import MODEL_MAPPING, ModelType
+
 all_langs = ['zh', 'en']
 builder: Type['BaseUI'] = None
 base_builder: Type['BaseUI'] = None
@@ -168,3 +170,8 @@ def get_default_value_from_dataclass(dataclass):
             else:
                 default_dict[f.name] = None
         return default_dict
+
+    @staticmethod
+    def get_custom_name_list():
+        return list(
+            set(MODEL_MAPPING.keys()) - set(ModelType.get_model_name_list()))
diff --git a/swift/ui/llm_infer/llm_infer.py b/swift/ui/llm_infer/llm_infer.py
@@ -1,6 +1,5 @@
 import os
 import re
-from dataclasses import fields
 from typing import Type
 
 import gradio as gr
@@ -138,7 +137,6 @@ def reset_memory(cls):
 
     @classmethod
     def prepare_checkpoint(cls, *args):
-        global model, tokenizer, template
         torch.cuda.empty_cache()
         infer_args = cls.get_default_value_from_dataclass(InferArguments)
         kwargs = {}
@@ -201,6 +199,8 @@ def generate_chat(cls, model_and_template, template_type, prompt: str,
             gr.Warning(cls.locale('generate_alert', cls.lang)['value'])
             return '', None
         model, template = model_and_template
+        if os.environ.get('MODELSCOPE_ENVIRONMENT') == 'studio':
+            model.cuda()
         if not template_type.endswith('generation'):
             old_history, history = limit_history_length(
                 template, prompt, history, int(max_new_tokens))
@@ -211,3 +211,5 @@ def generate_chat(cls, model_and_template, template_type, prompt: str,
         for _, history in gen:
             total_history = old_history + history
             yield '', total_history
+        if os.environ.get('MODELSCOPE_ENVIRONMENT') == 'studio':
+            model.cpu()
diff --git a/swift/ui/llm_infer/model.py b/swift/ui/llm_infer/model.py
@@ -86,7 +86,7 @@ def do_build_ui(cls, base_tab: Type['BaseUI']):
             model_type = gr.Dropdown(
                 elem_id='model_type',
                 choices=[base_tab.locale('checkpoint', cls.lang)['value']]
-                + ModelType.get_model_name_list(),
+                + ModelType.get_model_name_list() + cls.get_custom_name_list(),
                 value=base_tab.locale('checkpoint', cls.lang)['value'],
                 scale=20)
             model_id_or_path = gr.Textbox(
diff --git a/swift/ui/llm_train/llm_train.py b/swift/ui/llm_train/llm_train.py
diff --git a/swift/ui/llm_train/model.py b/swift/ui/llm_train/model.py
diff --git a/swift/ui/llm_train/runtime.py b/swift/ui/llm_train/runtime.py
diff --git a/tests/llm/test_run.py b/tests/llm/test_run.py
diff --git a/tests/llm/test_template.py b/tests/llm/test_template.py