Skip to content

Commit de6baa7

Browse files
committed
Added some fields under custom dataclass to let is pass through HfArgumentParser
Signed-off-by: romit <[email protected]>
1 parent 80b866c commit de6baa7

File tree

3 files changed

+81
-5
lines changed

3 files changed

+81
-5
lines changed

tuning/config/peft_config.py

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# Standard
1616
from dataclasses import dataclass, field
1717
from enum import Enum
18-
from typing import List
18+
from typing import List, Optional
1919

2020
# Third Party
2121
from peft import LoraConfig as _LoraConfig
@@ -51,9 +51,87 @@ class LoraConfig(_LoraConfig):
5151
lora_dropout (`float`):
5252
The dropout probability for Lora layers.
5353
"""
54+
5455
lora_alpha: int = 32
5556
lora_dropout: float = 0.05
5657

58+
# HACK: The following list of arguments are listed here
59+
# as a temperorary fix which reduces the field annotation
60+
# from Optional[List[str], str] to Optional[List[str], str]
61+
# Please see: https://github.com/huggingface/transformers/issues/40915 for further explanation!
62+
target_modules: Optional[List[str]] = field(
63+
default=None,
64+
metadata={
65+
"help": (
66+
"List of module names or regex expression of the module names to replace with LoRA. "
67+
"For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. "
68+
"This can also be a wildcard 'all-linear' which matches all linear/Conv1D "
69+
"(if the model is a PreTrainedModel, the output layer excluded). "
70+
"If not specified, modules will be chosen according to the model architecture, If the architecture is "
71+
"not known, an error will be raised -- in this case, you should specify the target modules manually. "
72+
"To avoid targeting any modules (because you want to apply `target_parameters`), set "
73+
"`target_modules=[]`."
74+
),
75+
},
76+
)
77+
exclude_modules: List[str] | None = field(
78+
default=None,
79+
metadata={
80+
"help": "List of module names or regex expression of the module names to exclude from Lora."
81+
},
82+
)
83+
init_lora_weights: (bool) = field(
84+
default=True,
85+
metadata={
86+
"help": (
87+
"How to initialize the weights of the LoRA layers. "
88+
"Passing True (default) results in the default initialization from the reference implementation from "
89+
"Microsoft, with the LoRA B weight being set to 0. This means that without further training, the LoRA "
90+
"adapter will be a no-op. "
91+
"Setting the initialization to False leads to random initialization of LoRA A and B, meaning that LoRA "
92+
"is not a no-op before training; this setting is intended for debugging purposes. "
93+
),
94+
},
95+
)
96+
layers_to_transform: Optional[list[int]] = field(
97+
default=None,
98+
metadata={
99+
"help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index. "
100+
"This only works when target_modules is a list of str."
101+
},
102+
)
103+
layers_pattern: Optional[list[str]] = field(
104+
default=None,
105+
metadata={
106+
"help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern."
107+
"This only works when target_modules is a list of str. This should target the `nn.ModuleList` of the "
108+
"model, which is often called `'layers'` or `'h'`."
109+
},
110+
)
111+
trainable_token_indices: Optional[list[int]] = field(
112+
default=None,
113+
metadata={
114+
"help": (
115+
"Lets you specify which token indices to selectively fine-tune without requiring to re-train the "
116+
"whole embedding matrix using the `peft.TrainableTokensModel` method. You can specify token indices "
117+
"in two ways. Either you specify a list of indices which will then target the model's input embedding "
118+
"layer (or, if not found, `embed_tokens`). (Not supported yet) Alternatively, you can specify a dictionary where the key "
119+
"is the name of the embedding module and the values are the list of token indices, e.g. "
120+
"`{'embed_tokens': [0, 1, ...]}`. Note that training with FSDP requires `use_orig_params=True` to "
121+
"avoid issues with non-uniform `requires_grad`."
122+
)
123+
},
124+
)
125+
loftq_config: dict = field(
126+
default_factory=dict,
127+
metadata={
128+
"help": (
129+
"The configuration of LoftQ. If this is passed, then LoftQ will be used to quantize the backbone "
130+
"weights and initialize Lora layers. Also set `init_lora_weights='loftq'` in this case."
131+
)
132+
},
133+
)
134+
57135
def __post_init__(self):
58136
# If target_modules is a single-element list, convert it into a plain string
59137
if self.target_modules == ["all-linear"]:

tuning/sft_trainer.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -853,9 +853,7 @@ def main():
853853
)
854854
sys.exit(INTERNAL_ERROR_EXIT_CODE)
855855

856-
if isinstance(
857-
tune_config, LoraConfig
858-
): # aLoraConfig subclasses LoraConfig
856+
if isinstance(tune_config, LoraConfig): # aLoraConfig subclasses LoraConfig
859857
try:
860858
if training_args.save_model_dir:
861859
# Write number of added tokens to artifacts

tuning/utils/config_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def get_hf_peft_config(task_type, tuning_config, tokenizer_name_or_path):
115115
lora_config = asdict(tuning_config)
116116

117117
if not hasattr(lora_config, "task_type"):
118-
lora_config["task_type"]=task_type
118+
lora_config["task_type"] = task_type
119119
hf_peft_config = HFLoraConfig(**lora_config)
120120
elif isinstance(tuning_config, peft_config.PromptTuningConfig):
121121
hf_peft_config = HFPromptTuningConfig(

0 commit comments

Comments
 (0)