Added some fields under custom dataclass to let is pass through HfArgumentParser

romitjain · romitjain · commit de6baa71bcdd · 2025-09-16T16:32:28.000Z
Signed-off-by: romit &lt;romit@ibm.com&gt;
diff --git a/tuning/config/peft_config.py b/tuning/config/peft_config.py
@@ -15,7 +15,7 @@
 # Standard
 from dataclasses import dataclass, field
 from enum import Enum
-from typing import List
+from typing import List, Optional
 
 # Third Party
 from peft import LoraConfig as _LoraConfig
@@ -51,9 +51,87 @@ class LoraConfig(_LoraConfig):
         lora_dropout (`float`):
             The dropout probability for Lora layers.
     """
+
     lora_alpha: int = 32
     lora_dropout: float = 0.05
 
+    # HACK: The following list of arguments are listed here
+    # as a temperorary fix which reduces the field annotation
+    # from Optional[List[str], str] to Optional[List[str], str]
+    # Please see: https://github.com/huggingface/transformers/issues/40915 for further explanation!
+    target_modules: Optional[List[str]] = field(
+        default=None,
+        metadata={
+            "help": (
+                "List of module names or regex expression of the module names to replace with LoRA. "
+                "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. "
+                "This can also be a wildcard 'all-linear' which matches all linear/Conv1D "
+                "(if the model is a PreTrainedModel, the output layer excluded). "
+                "If not specified, modules will be chosen according to the model architecture, If the architecture is "
+                "not known, an error will be raised -- in this case, you should specify the target modules manually. "
+                "To avoid targeting any modules (because you want to apply `target_parameters`), set "
+                "`target_modules=[]`."
+            ),
+        },
+    )
+    exclude_modules: List[str] | None = field(
+        default=None,
+        metadata={
+            "help": "List of module names or regex expression of the module names to exclude from Lora."
+        },
+    )
+    init_lora_weights: (bool) = field(
+        default=True,
+        metadata={
+            "help": (
+                "How to initialize the weights of the LoRA layers. "
+                "Passing True (default) results in the default initialization from the reference implementation from "
+                "Microsoft, with the LoRA B weight being set to 0. This means that without further training, the LoRA "
+                "adapter will be a no-op. "
+                "Setting the initialization to False leads to random initialization of LoRA A and B, meaning that LoRA "
+                "is not a no-op before training; this setting is intended for debugging purposes. "
+            ),
+        },
+    )
+    layers_to_transform: Optional[list[int]] = field(
+        default=None,
+        metadata={
+            "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index. "
+            "This only works when target_modules is a list of str."
+        },
+    )
+    layers_pattern: Optional[list[str]] = field(
+        default=None,
+        metadata={
+            "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern."
+            "This only works when target_modules is a list of str. This should target the `nn.ModuleList` of the "
+            "model, which is often called `'layers'` or `'h'`."
+        },
+    )
+    trainable_token_indices: Optional[list[int]] = field(
+        default=None,
+        metadata={
+            "help": (
+                "Lets you specify which token indices to selectively fine-tune without requiring to re-train the "
+                "whole embedding matrix using the `peft.TrainableTokensModel` method. You can specify token indices "
+                "in two ways. Either you specify a list of indices which will then target the model's input embedding "
+                "layer (or, if not found, `embed_tokens`). (Not supported yet) Alternatively, you can specify a dictionary where the key "
+                "is the name of the embedding module and the values are the list of token indices, e.g. "
+                "`{'embed_tokens': [0, 1, ...]}`. Note that training with FSDP requires `use_orig_params=True` to "
+                "avoid issues with non-uniform `requires_grad`."
+            )
+        },
+    )
+    loftq_config: dict = field(
+        default_factory=dict,
+        metadata={
+            "help": (
+                "The configuration of LoftQ. If this is passed, then LoftQ will be used to quantize the backbone "
+                "weights and initialize Lora layers. Also set `init_lora_weights='loftq'` in this case."
+            )
+        },
+    )
+
     def __post_init__(self):
         # If target_modules is a single-element list, convert it into a plain string
         if self.target_modules == ["all-linear"]:
diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py
@@ -853,9 +853,7 @@ def main():
             )
             sys.exit(INTERNAL_ERROR_EXIT_CODE)
 
-    if isinstance(
-        tune_config, LoraConfig
-    ):  # aLoraConfig subclasses LoraConfig
+    if isinstance(tune_config, LoraConfig):  # aLoraConfig subclasses LoraConfig
         try:
             if training_args.save_model_dir:
                 # Write number of added tokens to artifacts
diff --git a/tuning/utils/config_utils.py b/tuning/utils/config_utils.py
@@ -115,7 +115,7 @@ def get_hf_peft_config(task_type, tuning_config, tokenizer_name_or_path):
         lora_config = asdict(tuning_config)
 
         if not hasattr(lora_config, "task_type"):
-            lora_config["task_type"]=task_type
+            lora_config["task_type"] = task_type
         hf_peft_config = HFLoraConfig(**lora_config)
     elif isinstance(tuning_config, peft_config.PromptTuningConfig):
         hf_peft_config = HFPromptTuningConfig(