|
| 1 | +# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | + |
| 16 | +from dataclasses import dataclass, field |
| 17 | +from typing import Optional |
| 18 | + |
| 19 | +from paddlenlp.trainer import TrainingArguments |
| 20 | +from paddlenlp.trainer.trainer_utils import IntervalStrategy |
| 21 | +from paddlenlp.trainer.utils.doc import add_start_docstrings |
| 22 | +from paddlenlp.transformers.configuration_utils import llmmetaclass |
| 23 | + |
| 24 | + |
| 25 | +@dataclass |
| 26 | +@llmmetaclass |
| 27 | +@add_start_docstrings(TrainingArguments.__doc__) |
| 28 | +class KTOTrainingArguments(TrainingArguments): |
| 29 | + """KTOTrainingArguments""" |
| 30 | + |
| 31 | + unified_checkpoint: bool = field( |
| 32 | + default=True, |
| 33 | + metadata={"help": "Enable fused linear grad add strategy."}, |
| 34 | + ) |
| 35 | + unified_checkpoint_config: Optional[str] = field( |
| 36 | + default="", |
| 37 | + metadata={"help": "Configs to unify hybrid parallel checkpoint.\n"}, |
| 38 | + ) |
| 39 | + autotuner_benchmark: bool = field( |
| 40 | + default=False, |
| 41 | + metadata={"help": "Whether to run benchmark by autotuner. True for from_scratch."}, |
| 42 | + ) |
| 43 | + benchmark: bool = field( |
| 44 | + default=False, |
| 45 | + metadata={"help": "Whether to run benchmark by autotuner. True for from_scratch."}, |
| 46 | + ) |
| 47 | + |
| 48 | + def __post_init__(self): |
| 49 | + super().__post_init__() |
| 50 | + if self.autotuner_benchmark: |
| 51 | + self.num_train_epochs = 1 |
| 52 | + self.max_steps = 5 |
| 53 | + self.do_train = True |
| 54 | + self.do_export = False |
| 55 | + self.do_predict = False |
| 56 | + self.do_eval = False |
| 57 | + self.overwrite_output_dir = True |
| 58 | + self.load_best_model_at_end = False |
| 59 | + self.report_to = [] |
| 60 | + self.save_strategy = IntervalStrategy.NO |
| 61 | + self.evaluation_strategy = IntervalStrategy.NO |
| 62 | + if not self.disable_tqdm: |
| 63 | + self.logging_steps = 1 |
| 64 | + self.logging_strategy = IntervalStrategy.STEPS |
| 65 | + if self.benchmark: |
| 66 | + self.do_train = True |
| 67 | + self.do_export = False |
| 68 | + self.do_predict = False |
| 69 | + self.do_eval = False |
| 70 | + self.overwrite_output_dir = True |
| 71 | + self.load_best_model_at_end = False |
| 72 | + self.save_strategy = IntervalStrategy.NO |
| 73 | + self.evaluation_strategy = IntervalStrategy.NO |
| 74 | + if not self.disable_tqdm: |
| 75 | + self.logging_steps = 1 |
| 76 | + self.logging_strategy = IntervalStrategy.STEPS |
| 77 | + if self.max_steps > 0: |
| 78 | + self.num_train_epochs = 1 |
| 79 | + |
| 80 | + |
| 81 | +@dataclass |
| 82 | +class KTOConfig: |
| 83 | + """KTOConfig""" |
| 84 | + |
| 85 | + beta: float = field(default=0.1, metadata={"help": "the beta parameter for KTO loss"}) |
| 86 | + desirable_weight: float = field(default=1.0, metadata={"help": "desirable_weight"}) |
| 87 | + undesirable_weight: float = field(default=1.0, metadata={"help": "undesirable_weight"}) |
| 88 | + lora: bool = field(default=False, metadata={"help": "Use LoRA model."}) |
| 89 | + |
| 90 | + |
| 91 | +@dataclass |
| 92 | +class KTODataArgument: |
| 93 | + """DataArgument""" |
| 94 | + |
| 95 | + train_dataset_path: str = field(default="./data/train.jsonl", metadata={"help": "Path to the train dataset dir."}) |
| 96 | + dev_dataset_path: str = field(default="./data/dev.jsonl", metadata={"help": "Path to the dev dataset dir."}) |
| 97 | + max_seq_len: int = field(default=4096, metadata={"help": "Maximum sequence length."}) |
| 98 | + max_prompt_len: int = field(default=2048, metadata={"help": "Maximum prompt length."}) |
| 99 | + greedy_zero_padding: bool = field( |
| 100 | + default=False, |
| 101 | + metadata={"help": "Whether to use Greedy Zero Padding data stream."}, |
| 102 | + ) |
| 103 | + |
| 104 | + |
| 105 | +@dataclass |
| 106 | +class KTOModelArgument: |
| 107 | + """ModelArgument""" |
| 108 | + |
| 109 | + model_name_or_path: str = field( |
| 110 | + default=None, metadata={"help": "Pretrained model name or path to local directory."} |
| 111 | + ) |
| 112 | + tokenizer_name_or_path: Optional[str] = field( |
| 113 | + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} |
| 114 | + ) |
| 115 | + flash_mask: bool = field(default=False, metadata={"help": "Whether to use flash mask in flash attention."}) |
| 116 | + weight_quantize_algo: str = field( |
| 117 | + default=None, |
| 118 | + metadata={"help": "Model weight quantization algorithm including 'nf4'(qlora), 'weight_only_int8'."}, |
| 119 | + ) |
| 120 | + fuse_attention_qkv: bool = field( |
| 121 | + default=None, |
| 122 | + metadata={"help": "whether to fuse attention qkv"}, |
| 123 | + ) |
| 124 | + fuse_attention_ffn: bool = field( |
| 125 | + default=None, |
| 126 | + metadata={"help": "whether to fuse first up and gate proj in mlp block"}, |
| 127 | + ) |
| 128 | + # LoRA |
| 129 | + lora_rank: int = field(default=8, metadata={"help": "Lora rank."}) |
| 130 | + lora_path: str = field(default=None, metadata={"help": "Initialize lora state dict."}) |
| 131 | + rslora: bool = field(default=False, metadata={"help": "Whether to use RsLoRA"}) |
| 132 | + lora_plus_scale: float = field(default=1.0, metadata={"help": "Lora B scale in LoRA+ technique"}) |
| 133 | + lora_alpha: int = field(default=-1, metadata={"help": "lora_alpha"}) |
| 134 | + rslora_plus: bool = field(default=False, metadata={"help": "Strengthen lora performance"}) |
| 135 | + use_quick_lora: bool = field(default=True, metadata={"help": "quick lora"}) |
0 commit comments