A bunch of small features (#944)

tastelikefeet · web-flow · commit 03b17dcfabe0 · 2024-05-16T14:40:39.000+08:00
diff --git a/swift/hub/api.py b/swift/hub/api.py
@@ -624,9 +624,6 @@ def get_cookies():
                 cookies = pickle.load(f)
                 for cookie in cookies:
                     if cookie.is_expired():
-                        logger.warning(
-                            'Authentication has expired, '
-                            'please re-login if you need to access private models or datasets.')
                         return None
                 return cookies
         return None
diff --git a/swift/hub/errors.py b/swift/hub/errors.py
@@ -80,9 +80,7 @@ def handle_http_response(response, logger, cookies, model_id):
         response.raise_for_status()
     except HTTPError as error:
         if cookies is None:  # code in [403] and
-            logger.error(
-                f'Authentication token does not exist, failed to access model {model_id} which may not exist or may be \
-                private. Please login first.')
+            pass
         message = _decode_response_error(response)
         raise HTTPError('Response details: %s' % message) from error
 
diff --git a/swift/llm/sft.py b/swift/llm/sft.py
@@ -199,6 +199,8 @@ def llm_sft(args: SftArguments) -> Dict[str, Union[str, Any]]:
                 template, val_dataset, args.max_length, lazy_tokenize=args.lazy_tokenize)
         dataset_info = {}
         if not args.lazy_tokenize:
+            td0 = train_dataset[0]
+            print_example(td0, tokenizer, {})
             dataset_info['train_dataset'] = stat_dataset(train_dataset)
             if val_dataset is not None:
                 dataset_info['val_dataset'] = stat_dataset(val_dataset)
diff --git a/swift/trainers/mixin.py b/swift/trainers/mixin.py
@@ -376,13 +376,17 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None):
                 self.model, output_dir, state_dict=state_dict, safe_serialization=save_safetensors)
         else:
             self.model.save_pretrained(output_dir, state_dict=state_dict, safe_serialization=save_safetensors)
+        sft_args = getattr(self, 'sft_args', None)
         # tokenizer
-        if self.tokenizer is not None:
+        from swift import SWIFT_MAPPING
+        addtional_module_tuners = [
+            name.lower() for name, (config, cls) in SWIFT_MAPPING.items() if cls.has_additional_modules()
+        ]
+        if self.tokenizer is not None and sft_args.sft_type not in addtional_module_tuners:
             self.tokenizer.save_pretrained(output_dir)
         # training_args.bin
         torch.save(self.args, os.path.join(output_dir, 'training_args.bin'))
         # additional files
-        sft_args = getattr(self, 'sft_args', None)
         if sft_args is not None and sft_args.sft_type == 'full':
             additional_files = getattr(self.args, 'additional_saved_files', []) + ['preprocessor_config.json']
             if model_dir is not None:
diff --git a/swift/tuners/utils.py b/swift/tuners/utils.py
@@ -5,9 +5,10 @@
 import os
 import shutil
 import threading
+import uuid
 from dataclasses import asdict, dataclass, field
 from types import FunctionType
-from typing import Dict, List, Optional, OrderedDict, Union
+from typing import Dict, Optional, Union
 
 import json
 import numpy as np
@@ -176,11 +177,15 @@ def get_activated_adapters(self):
 
 class OffloadHelper:
 
-    sub_dir = 'offload_cache'
-    cache_dir = os.path.join(get_cache_dir(), sub_dir)
-    shutil.rmtree(cache_dir, ignore_errors=True)
-    os.makedirs(cache_dir, exist_ok=True)
-    index = {}
+    def __init__(self):
+        sub_dir = os.path.join('offload_cache', str(uuid.uuid4().hex))
+        self.cache_dir = os.path.join(get_cache_dir(), sub_dir)
+        shutil.rmtree(self.cache_dir, ignore_errors=True)
+        os.makedirs(self.cache_dir, exist_ok=True)
+        self.index = {}
+
+    def __del__(self):
+        shutil.rmtree(self.cache_dir, ignore_errors=True)
 
     @staticmethod
     def offload_weight(weight, weight_name, offload_folder, index=None):
@@ -221,26 +226,24 @@ def load_offloaded_weight(weight_file, weight_info):
 
         return weight
 
-    @staticmethod
-    def offload_disk(module: torch.nn.Module, adapter_name, module_key):
+    def offload_disk(self, module: torch.nn.Module, adapter_name, module_key):
         key = adapter_name + ':' + module_key
         md5 = hashlib.md5(key.encode('utf-8')).hexdigest()
-        sub_folder = os.path.join(OffloadHelper.cache_dir, md5)
+        sub_folder = os.path.join(self.cache_dir, md5)
         os.makedirs(sub_folder, exist_ok=True)
         state_dict = module.state_dict()
-        OffloadHelper.index[md5] = {}
+        self.index[md5] = {}
         for key, tensor in state_dict.items():
-            OffloadHelper.offload_weight(tensor, key, sub_folder, OffloadHelper.index[md5])
+            OffloadHelper.offload_weight(tensor, key, sub_folder, self.index[md5])
 
-    @staticmethod
-    def load_disk(module: torch.nn.Module, adapter_name, module_key):
+    def load_disk(self, module: torch.nn.Module, adapter_name, module_key):
         key = adapter_name + ':' + module_key
         md5 = hashlib.md5(key.encode('utf-8')).hexdigest()
-        sub_folder = os.path.join(OffloadHelper.cache_dir, md5)
+        sub_folder = os.path.join(self.cache_dir, md5)
         state_dict = {}
-        for key, value in OffloadHelper.index[md5].items():
+        for key, value in self.index[md5].items():
             file = os.path.join(sub_folder, f'{key}.dat')
-            state_dict[key] = OffloadHelper.load_offloaded_weight(file, OffloadHelper.index[md5][key])
+            state_dict[key] = OffloadHelper.load_offloaded_weight(file, self.index[md5][key])
         if version.parse(torch.__version__) >= version.parse('2.1.0'):
             module.load_state_dict(state_dict, assign=True)
         else:
@@ -264,6 +267,8 @@ def load_disk(module: torch.nn.Module, adapter_name, module_key):
 
 class SwiftAdapter:
 
+    offload_helper = OffloadHelper()
+
     @staticmethod
     def prepare_model(model: torch.nn.Module, config: SwiftConfig, adapter_name: str) -> SwiftOutput:
         raise NotImplementedError
@@ -294,7 +299,7 @@ def offload(module: torch.nn.Module, adapter_name, module_key, offload: str):
                 module.to('cpu')
         elif offload == 'meta':
             if str(device) != 'meta':
-                OffloadHelper.offload_disk(module, adapter_name=adapter_name, module_key=module_key)
+                SwiftAdapter.offload_helper.offload_disk(module, adapter_name=adapter_name, module_key=module_key)
                 module.to('meta')
         else:
             raise NotImplementedError
@@ -309,7 +314,7 @@ def load(module: torch.nn.Module, adapter_name, module_key):
             module.to(module.origin_device)
             delattr(module, 'origin_device')
         elif str(device) == 'meta':
-            OffloadHelper.load_disk(module, adapter_name=adapter_name, module_key=module_key)
+            SwiftAdapter.offload_helper.load_disk(module, adapter_name=adapter_name, module_key=module_key)
             module.to(module.origin_device)
             delattr(module, 'origin_device')
 
diff --git a/swift/ui/app.py b/swift/ui/app.py
@@ -12,16 +12,23 @@
 locale_dict = {
     'title': {
         'zh': '🚀SWIFT: 轻量级大模型训练推理框架',
-        'en': '🚀SWIFT: Scalable lightWeight Infrastructure for Fine-Tuning'
+        'en': '🚀SWIFT: Scalable lightWeight Infrastructure for Fine-Tuning and Inference'
     },
     'sub_title': {
         'zh':
         '请查看 <a href=\"https://github.com/modelscope/swift/tree/main/docs/source\" target=\"_blank\">'
         'SWIFT 文档</a>来查看更多功能',
         'en':
-        'Please check <a href=\"https://github.com/modelscope/swift/tree/main/docs/source\" target=\"_blank\">'
+        'Please check <a href=\"https://github.com/modelscope/swift/tree/main/docs/source_en\" target=\"_blank\">'
         'SWIFT Documentation</a> for more usages',
     },
+    'star_beggar': {
+        'zh':
+        '喜欢<a href=\"https://github.com/modelscope/swift\" target=\"_blank\">SWIFT</a>就动动手指给我们加个star吧🥺 ',
+        'en':
+        'If you like <a href=\"https://github.com/modelscope/swift\" target=\"_blank\">SWIFT</a>, '
+        'please take a few seconds to star us🥺 '
+    },
 }
 
 
@@ -31,6 +38,7 @@ def run_ui():
     with gr.Blocks(title='SWIFT WebUI') as app:
         gr.HTML(f"<h1><center>{locale_dict['title'][lang]}</center></h1>")
         gr.HTML(f"<h3><center>{locale_dict['sub_title'][lang]}</center></h3>")
+        gr.HTML(f"<h3><center>{locale_dict['star_beggar'][lang]}</center></h3>")
         with gr.Tabs():
             LLMTrain.build_ui(LLMTrain)
             LLMInfer.build_ui(LLMInfer)
diff --git a/swift/ui/llm_train/dataset.py b/swift/ui/llm_train/dataset.py
@@ -37,8 +37,8 @@ class Dataset(BaseUI):
                 'en': 'Custom train dataset path'
             },
             'info': {
-                'zh': '输入自定义的训练数据集路径，逗号分隔',
-                'en': 'Extra train files, split by comma'
+                'zh': '输入自定义的训练数据集路径，空格分隔',
+                'en': 'Extra train files, split by blank'
             }
         },
         'custom_val_dataset_path': {
diff --git a/swift/ui/llm_train/quantization.py b/swift/ui/llm_train/quantization.py
@@ -16,6 +16,16 @@ class Quantization(BaseUI):
                 'en': 'Quantization'
             },
         },
+        'quant_method': {
+            'label': {
+                'zh': '量化方式',
+                'en': 'Quantization method'
+            },
+            'info': {
+                'zh': '如果制定了量化位数，本参数默认为bnb',
+                'en': 'Default is bnb if quantization_bit is specified'
+            }
+        },
         'quantization_bit': {
             'label': {
                 'zh': '量化bit数',
@@ -51,6 +61,7 @@ def do_build_ui(cls, base_tab: Type['BaseUI']):
         with gr.Accordion(elem_id='quantization_tab', open=False):
             with gr.Row():
                 gr.Dropdown(elem_id='quantization_bit')
+                gr.Dropdown(elem_id='quant_method')
                 gr.Dropdown(elem_id='bnb_4bit_comp_dtype')
                 gr.Dropdown(elem_id='bnb_4bit_quant_type')
                 gr.Checkbox(elem_id='bnb_4bit_use_double_quant')