Support max memory args (#1382)

tastelikefeet · web-flow · commit d5f0a4d61bcf · 2024-07-13T09:27:00.000+08:00
diff --git a/docs/source/LLM/命令行参数.md b/docs/source/LLM/命令行参数.md
@@ -134,7 +134,8 @@
   - 匹配规则的应用优先级，从高到低为：query字段 > response特定字段 > 正则表达式匹配规则。
 - `--custom_register_path`: 默认为`None`. 传入`.py`文件, 用于注册模板、模型和数据集.
 - `--custom_dataset_info`: 默认为`None`, 传入外置dataset_info.json的路径、json字符串或者dict. 用于拓展数据集. 格式参考: https://github.com/modelscope/swift/blob/main/swift/llm/data/dataset_info.json
-- `--device_map_config_path`: 从本地文件中手动配置模型的device_map, 默认为None
+- `--device_map_config_path`: 从本地文件中手动配置模型的device_map, 默认为None.
+- `--device_max_memory`: 每个设备device_map的最大可用显存, `List`, 默认为`[]`, 传递的值数量必须和可见显卡数量相等. 比如`10GB 10GB`.
 
 ### Long Context
 
@@ -252,7 +253,8 @@ RLHF参数继承了sft参数, 除此之外增加了以下参数:
 - `--load_args_from_ckpt_dir`: 是否从`ckpt_dir`的`sft_args.json`文件中读取模型配置信息. 默认是`True`.
 - `--load_dataset_config`: 该参数只有在`--load_args_from_ckpt_dir true`时才生效. 即是否从`ckpt_dir`的`sft_args.json`文件中读取数据集相关的配置信息. 默认为`False`.
 - `--eval_human`: 使用数据集中的验证集部分进行评估还是使用人工的方式评估. 默认值为`None`, 进行智能选择,  如果没有任何数据集(含自定义数据集)传入, 则会使用人工评估的方式. 如果有数据集传入, 则会使用数据集方式评估.
-- `--device_map_config_path`: 从本地文件中手动配置模型的device_map, 默认为None
+- `--device_map_config_path`: 从本地文件中手动配置模型的device_map, 默认为None.
+- `--device_max_memory`: 每个设备device_map的最大可用显存, `List`, 默认为`[]`, 传递的值数量必须和可见显卡数量相等. 比如`10GB 10GB`.
 - `--seed`: 默认值为`42`, 具体的参数介绍可以在`sft.sh命令行参数`中查看.
 - `--dtype`: 默认值为`'AUTO`, 具体的参数介绍可以在`sft.sh命令行参数`中查看.
 - `--dataset`: 默认值为`[]`, 具体的参数介绍可以在`sft.sh命令行参数`中查看.
diff --git a/docs/source_en/LLM/Command-line-parameters.md b/docs/source_en/LLM/Command-line-parameters.md
@@ -135,7 +135,8 @@
   - The application priority of matching rules is as follows, from highest to lowest: query fields > specific response fields > regular expression matching rules.
 - `--custom_register_path`: Default is `None`. Pass in a `.py` file used to register templates, models, and datasets.
 - `--custom_dataset_info`: Default is `None`. Pass in the path to an external `dataset_info.json`, a JSON string, or a dictionary. Used to register custom datasets. The format example: https://github.com/modelscope/swift/blob/main/swift/llm/data/dataset_info.json
-- `device_map_config_path`: Manually configure the model's device map from a local file, defaults to None.
+- `--device_map_config_path`: Manually configure the model's device map from a local file, defaults to None.
+- `--device_max_memory`: The max memory of each device can use for `device_map`, `List`, default is `[]`, The number of values must equal to the device count. Like `10GB 10GB`.
 
 ### Long Context
 
@@ -253,7 +254,8 @@ RLHF parameters are an extension of the sft parameters, with the addition of the
 - `--load_args_from_ckpt_dir`: Whether to read model configuration info from `sft_args.json` file in `ckpt_dir`. Default is `True`.
 - `--load_dataset_config`: This parameter only takes effect when `--load_args_from_ckpt_dir true`. I.e. whether to read dataset related configuration from `sft_args.json` file in `ckpt_dir`. Default is `False`.
 - `--eval_human`: Whether to evaluate using validation set portion of dataset or manual evaluation. Default is `None`, for intelligent selection, if no datasets (including custom datasets) are passed, manual evaluation will be used. If datasets are passed, dataset evaluation will be used.
-- `device_map_config_path`: Manually configure the model's device map from a local file, defaults to None.
+- `--device_map_config_path`: Manually configure the model's device map from a local file, defaults to None.
+- `--device_max_memory`: The max memory of each device can use for `device_map`, `List`, default is `[]`, The number of values must equal to the device count. Like `10GB 10GB`.
 - `--seed`: Default is `42`, see `sft.sh command line arguments` for parameter details.
 - `--dtype`: Default is `'AUTO`, see `sft.sh command line arguments` for parameter details.
 - `--dataset`: Default is `[]`, see `sft.sh command line arguments` for parameter details.
diff --git a/swift/llm/infer.py b/swift/llm/infer.py
@@ -146,6 +146,9 @@ def prepare_model_template(args: InferArguments,
     if device_map == 'auto':
         model_kwargs['low_cpu_mem_usage'] = True
     model_kwargs['device_map'] = device_map
+    if args.device_max_memory:
+        assert len(args.device_max_memory) == torch.cuda.device_count()
+        model_kwargs['max_memory'] = {i: mem for i, mem in enumerate(args.device_max_memory)}
 
     # Loading Model and Tokenizer
     if hasattr(args, 'quant_config'):
diff --git a/swift/llm/rlhf.py b/swift/llm/rlhf.py
@@ -61,6 +61,14 @@ def llm_rlhf(args: RLHFArguments) -> Dict[str, Any]:
         else:
             model_kwargs['device_map'] = 'auto'
 
+    if args.device_max_memory:
+        n_gpu = torch.cuda.device_count()
+        assert len(args.device_max_memory) == n_gpu / local_world_size
+        model_kwargs['max_memory'] = {
+            i: mem
+            for i, mem in zip(list(range(local_rank, n_gpu, local_world_size)), args.device_max_memory)
+        }
+
     # quantization
     if args.quant_method == 'hqq':
         from transformers import HqqConfig
diff --git a/swift/llm/sft.py b/swift/llm/sft.py
@@ -60,6 +60,14 @@ def llm_sft(args: SftArguments) -> Dict[str, Union[str, Any]]:
         elif not use_torchacc():
             model_kwargs['device_map'] = 'auto'
 
+    if args.device_max_memory:
+        n_gpu = torch.cuda.device_count()
+        assert len(args.device_max_memory) == n_gpu / local_world_size
+        model_kwargs['max_memory'] = {
+            i: mem
+            for i, mem in zip(list(range(local_rank, n_gpu, local_world_size)), args.device_max_memory)
+        }
+
     if args.quant_method == 'hqq':
         from transformers import HqqConfig
         if args.hqq_dynamic_config_path is not None:
diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py
@@ -635,6 +635,7 @@ class SftArguments(ArgumentsBase):
     custom_dataset_info: Optional[str] = None  # .json
 
     device_map_config_path: Optional[str] = None
+    device_max_memory: List[str] = field(default_factory=list)
 
     # generation config
     max_new_tokens: int = 2048
@@ -1134,6 +1135,7 @@ class InferArguments(ArgumentsBase):
     custom_register_path: Optional[str] = None  # .py
     custom_dataset_info: Optional[str] = None  # .json
     device_map_config_path: Optional[str] = None
+    device_max_memory: List[str] = field(default_factory=list)
 
     # vllm
     gpu_memory_utilization: float = 0.9