modelscope
diff --git a/‎docs/source/AIGC/AnimateDiff微调推理文档.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/AIGC/AnimateDiff微调推理文档.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/LLM/Agent微调最佳实践.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/LLM/Agent微调最佳实践.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/LLM/DPO训练文档.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/LLM/DPO训练文档.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/LLM/Grok训练和推理.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/LLM/Grok训练和推理.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/LLM/LLM量化文档.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/LLM/LLM量化文档.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/LLM/命令行参数.md‎
Lines changed: 13 additions & 4 deletions b/‎docs/source/LLM/命令行参数.md‎
Lines changed: 13 additions & 4 deletions
diff --git a/‎docs/source/LLM/自定义与拓展.md‎
Lines changed: 8 additions & 5 deletions b/‎docs/source/LLM/自定义与拓展.md‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎docs/source_en/AIGC/AnimateDiff-train-infer.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source_en/AIGC/AnimateDiff-train-infer.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source_en/LLM/Agent-fine-tuning-best-practice.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source_en/LLM/Agent-fine-tuning-best-practice.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source_en/LLM/Command-line-parameters.md‎
Lines changed: 14 additions & 4 deletions b/‎docs/source_en/LLM/Command-line-parameters.md‎
Lines changed: 14 additions & 4 deletions
@@ -155,7 +155,7 @@ seed: int = 42 # 随机种子
 
 lora_rank: int = 8 # lora 参数
 lora_alpha: int = 32 # lora 参数
-lora_dropout_p: float = 0.05 # lora 参数
+lora_dropout: float = 0.05 # lora 参数
 lora_dtype: str = 'fp32' # lora模块dtype类型，如果为`AUTO`则跟随原始模块的dtype设定
 
 gradient_checkpointing: bool = False # 是否开启gc，默认不开启。注：当前版本diffusers有问题，不支持该参数为True
 
@@ -206,7 +206,7 @@ torchrun \
     --check_dataset_strategy warning \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.05 \
+    --lora_dropout 0.05 \
     --lora_target_modules ALL \
     --self_cognition_sample 3000 \
     --model_name 卡卡罗特 \
@@ -494,7 +494,7 @@ torchrun \
     --check_dataset_strategy warning \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.05 \
+    --lora_dropout 0.05 \
     --lora_target_modules ALL \
     --self_cognition_sample 3000 \
     --model_name 卡卡罗特 \
 
@@ -51,7 +51,7 @@ swift rlhf \
     --check_dataset_strategy  none  \
     --lora_rank  8  \
     --lora_alpha  32  \
-    --lora_dropout_p  0.05  \
+    --lora_dropout  0.05  \
     --lora_target_modules  ALL  \
     --gradient_checkpointing  true  \
     --batch_size  1  \
 
@@ -58,7 +58,7 @@ torchrun \
     --check_dataset_strategy warning \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.05 \
+    --lora_dropout 0.05 \
     --lora_dtype AUTO \
     --lora_target_modules DEFAULT \
     --gradient_checkpointing true \
 
@@ -30,7 +30,7 @@ pip install autoawq -U
 
 # 使用gptq量化:
 # auto_gptq和cuda版本有对应关系，请按照`https://github.com/PanQiWei/AutoGPTQ#quick-installation`选择版本
-pip install auto_gptq -U
+pip install auto_gptq optimum -U
 
 # 使用bnb量化：
 pip install bitsandbytes -U
 
@@ -3,7 +3,8 @@
 ## 目录
 
 - [sft 参数](#sft-参数)
-- [dpo 参数](#dpo-参数)
+- [pt 参数](#pt-参数)
+- [rlhf 参数](#rlhf-参数)
 - [infer merge-lora 参数](#infer-merge-lora-参数)
 - [export 参数](#export-参数)
 - [eval参数](#eval参数)
@@ -65,7 +66,7 @@
 - `--lora_target_regex`: 指定lora模块的regex表达式, `Optional[str]`类型. 默认为`None`, 如果该值传入, 则lora_target_modules不生效.
 - `--lora_rank`: 默认为`8`. 只有当`sft_type`指定为'lora'时才生效.
 - `--lora_alpha`: 默认为`32`. 只有当`sft_type`指定为'lora'时才生效.
-- `--lora_dropout_p`: 默认为`0.05`, 只有当`sft_type`指定为'lora'时才生效.
+- `--lora_dropout`: 默认为`0.05`, 只有当`sft_type`指定为'lora'时才生效.
 - `--init_lora_weights`: 初始化LoRA weights的方法, 可以指定为`true`, `false`, `guassian`, `pissa`, `pissa_niter_[number of iters]`, 默认值`true`.
 - `--lora_bias_trainable`: 默认为`'none'`, 可以选择的值: 'none', 'all'. 如果你要将bias全都设置为可训练, 你可以设置为`'all'`.
 - `--lora_modules_to_save`: 默认为`[]`. 如果你想要训练embedding, lm_head, 或者layer_norm, 你可以设置此参数, 例如: `--lora_modules_to_save EMBEDDING LN lm_head`. 如果传入`'EMBEDDING'`, 则将Embedding层添加到`lora_modules_to_save`. 如果传入`'LN'`, 则将`RMSNorm`和`LayerNorm`添加到`lora_modules_to_save`.
@@ -82,12 +83,12 @@
 - `--max_steps`: 训练的max_steps数, 默认为`-1`. 如果`max_steps >= 0`, 则覆盖`num_train_epochs`.
 - `--optim`: 默认为`'adamw_torch'`.
 - `--adam_beta1`: 默认为`0.9`.
-- `--adam_beta2`: 默认为`0.999`.
+- `--adam_beta2`: 默认为`0.95`.
 - `--adam_epsilon`: 默认为`1e-8`.
 - `--learning_rate`: 默认值为`None`, 即如果`sft_type`为lora, 则设置为1e-4, 如果`sft_type`为full, 则设置为1e-5.
 - `--weight_decay`: 默认值为`0.1`.
 - `--gradient_accumulation_steps`: 梯度累加, 默认值为`None`, 设置为`math.ceil(16 / self.batch_size / world_size)`. `total_batch_size =  batch_size * gradient_accumulation_steps * world_size`.
-- `--max_grad_norm`: 梯度裁剪, 默认值为`0.5`.
+- `--max_grad_norm`: 梯度裁剪, 默认值为`1`.
 - `--predict_with_generate`: 评估时是否使用生成式的方式, 默认为`False`. 如果设置为False, 则使用`loss`进行评估. 如果设置为True, 则使用`ROUGE-L`等指标进行评估. 使用生成式评估耗费的时间很长, 请谨慎选择.
 - `--lr_scheduler_type`: 默认值为`'cosine'`, 你可以选择: 'linear', 'cosine', 'constant'等.
 - `--warmup_ratio`: warmup占用总的训练steps的比例, 默认为`0.05`.
@@ -235,6 +236,14 @@ unsloth无新增参数，对已有参数进行调节即可支持：
 - `--ia3_feedforward_modules`: 指定IA3的MLP的Linear名称, 该名称必须在`ia3_target_modules`中.
 - `--ia3_modules_to_save`: IA3参与训练的额外模块. 具体含义可以参考`lora_modules_to_save`的含义.
 
+## PT 参数
+
+PT参数继承了sft参数，并修改了部分默认值.
+- `--sft_type`: 默认值为`'full'`.
+- `--lora_target_modules`: 默认值为`'ALL'`.
+- `--lazy_tokenize`: 默认值为`True`.
+- `--eval_steps`: 默认值为`500`.
+
 ## RLHF 参数
 
 RLHF参数继承了sft参数, 除此之外增加了以下参数:
 
@@ -123,27 +123,30 @@ system,instruction,input,output
 {"system": "123", "query": "AAAAA", "response": "BBBBB", "rejected_response": "CCCCC", "history": [["query1", "response1"], ["query2", "response2"]]}
 ```
 
-其中`system`和`history`为可选项
+- 其中`system`和`history`为可选项
 
 语言模型 (KTO)
 ```jsonl
 {"query": "11111", "response": "22222", "label": true}
 {"query": "aaaaa", "response": "bbbbb", "label": false}
 {"system": "123", "query": "AAAAA", "response": "BBBBB", "label": true, "history": [["query1", "response1"], ["query2", "response2"]]}
 ```
-注意`label`需要是bool类型, 不能是字符串
+- 注意`label`需要是bool类型, 不能是字符串
 
-其中`system`和`history`为可选项
+- 其中`system`和`history`为可选项
 
 
-视觉多模态大模型, 不同模型对图像数量的支持不同, 具体参考模型对应的最佳实践文档 （DPO/ORPO/SimPO/CPO）
+视觉多模态大模型（DPO/ORPO/SimPO/CPO）
+
 ```jsonl
 {"system": "123", "query": "11111", "response": "22222", "rejected_response": "33333", "images": ["image_path"], "history": [["query1", "response1"], ["query2", "response2"]]}
 {"system": "123", "query": "aaaaa", "response": "bbbbb", "rejected_response": "ccccc", "images": ["image_path"], "history": [["query1", "response1"], ["query2", "response2"]]}
 {"system": "123", "query": "AAAAA", "response": "BBBBB", "rejected_response": "CCCCC", "images": ["image_path"], "history": [["query1", "response1"], ["query2", "response2"]]}
 ```
 
-其中`system`和`history`为可选项
+- 不同模型对图像数量的支持不同, 具体参考模型对应的最佳实践文档
+
+- 其中`system`和`history`为可选项
 
 **Tool-Calling Agent**
 
 
@@ -150,7 +150,7 @@ seed: int = 42 # Random seed.
 
 lora_rank: int = 8 # lora parameter.
 lora_alpha: int = 32 # lora parameter.
-lora_dropout_p: float = 0.05 # lora parameter.
+lora_dropout: float = 0.05 # lora parameter.
 lora_dtype: str = 'fp32' # lora module dtype type. If `AUTO`, it follows the dtype setting of the original module.
 
 gradient_checkpointing: bool = False # Whether to enable gc, disabled by default. Note: The current version of diffusers has a problem and does not support this parameter being True.
 
@@ -206,7 +206,7 @@ torchrun \
     --check_dataset_strategy warning \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.05 \
+    --lora_dropout 0.05 \
     --lora_target_modules ALL \
     --self_cognition_sample 3000 \
     --model_name 卡卡罗特 \
@@ -486,7 +486,7 @@ torchrun \
     --check_dataset_strategy warning \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.05 \
+    --lora_dropout 0.05 \
     --lora_target_modules ALL \
     --self_cognition_sample 3000 \
     --model_name 卡卡罗特 \
 
@@ -3,7 +3,8 @@
 ## Table of Contents
 
 - [sft Parameters](#sft-parameters)
-- [dpo Parameters](#dpo-parameters)
+- [pt Parameters](#pt-parameters)
+- [rlhf Parameters](#rlhf-parameters)
 - [infer merge-lora Parameters](#infer-merge-lora-parameters)
 - [export Parameters](#export-parameters)
 - [eval Parameters](#eval-parameters)
@@ -66,7 +67,7 @@
 - `--lora_target_regex`: The lora target regex in `Optional[str]`. default is `None`. If this argument is specified, the `lora_target_modules` will have no effect.
 - `--lora_rank`: Default is `8`. Only takes effect when `sft_type` is 'lora'.
 - `--lora_alpha`: Default is `32`. Only takes effect when `sft_type` is 'lora'.
-- `--lora_dropout_p`: Default is `0.05`, only takes effect when `sft_type` is 'lora'.
+- `--lora_dropout`: Default is `0.05`, only takes effect when `sft_type` is 'lora'.
 - `--init_lora_weights`: Method to initialize LoRA weights, can be specified as `true`, `false`, `gaussian`, `pissa`, or `pissa_niter_[number of iters]`. Default value `true`.
 - `--lora_bias_trainable`: Default is `'none'`, options: 'none', 'all'. Set to `'all'` to make all biases trainable.
 - `--lora_modules_to_save`: Default is `[]`. If you want to train embedding, lm_head, or layer_norm, you can set this parameter, e.g. `--lora_modules_to_save EMBEDDING LN lm_head`. If passed `'EMBEDDING'`, Embedding layer will be added to `lora_modules_to_save`. If passed `'LN'`, `RMSNorm` and `LayerNorm` will be added to `lora_modules_to_save`.
@@ -83,12 +84,12 @@
 - `--max_steps`: Max_steps for training, default is `-1`. If `max_steps >= 0`, this overrides `num_train_epochs`.
 - `--optim`: Default is `'adamw_torch'`.
 - `--adam_beta1`: Default is `0.9`.
-- `--adam_beta2`: Default is `0.999`.
+- `--adam_beta2`: Default is `0.95`.
 - `--adam_epsilon`: Default is `1e-8`.
 - `--learning_rate`: Default is `None`, i.e. set to 1e-4 if `sft_type` is lora, set to 1e-5 if `sft_type` is full.
 - `--weight_decay`: Default is `0.01`.
 - `--gradient_accumulation_steps`: Gradient accumulation, default is `None`, set to `math.ceil(16 / self.batch_size / world_size)`. `total_batch_size =  batch_size * gradient_accumulation_steps * world_size`.
-- `--max_grad_norm`: Gradient clipping, default is `0.5`.
+- `--max_grad_norm`: Gradient clipping, default is `1`.
 - `--predict_with_generate`: Whether to use generation for evaluation, default is `False`. If set to False, evaluate using `loss`. If set to True, evaluate using `ROUGE-L` and other metrics. Generative evaluation takes a long time, choose carefully.
 - `--lr_scheduler_type`: Default is `'cosine'`, options: 'linear', 'cosine', 'constant', etc.
 - `--warmup_ratio`: Proportion of warmup in total training steps, default is `0.05`.
@@ -237,6 +238,15 @@ The following parameters take effect when `sft_type` is set to `ia3`.
 - `--ia3_feedforward_modules`: Specify the Linear name of IA3's MLP, this name must be in `ia3_target_modules`.
 - `--ia3_modules_to_save`: Additional modules participating in IA3 training. See meaning of `lora_modules_to_save`.
 
+## PT Parameters
+
+PT parameters inherit from the SFT parameters with some modifications to the default values:
+
+- `--sft_type`: Default value is `'full'`.
+- `--lora_target_modules`: Default value is `'ALL'`.
+- `--lazy_tokenize`: Default value is `True`.
+- `--eval_steps`: Default value is `500`.
+
 ## RLHF Parameters
 RLHF parameters are an extension of the sft parameters, with the addition of the following options:
 - `--rlhf_type`: Choose the alignment algorithm, with options such as 'dpo', 'orpo', 'simpo', 'kto', 'cpo'. For training scripts with  different algorithms, please refer to [document](./Human-Preference-Alignment-Training-Documentation.md)