|
15 | 15 | # Standard |
16 | 16 | from dataclasses import dataclass, field |
17 | 17 | from enum import Enum |
18 | | -from typing import List |
| 18 | +from typing import List, Optional |
19 | 19 |
|
20 | 20 | # Third Party |
21 | 21 | from peft import LoraConfig as _LoraConfig |
@@ -51,9 +51,87 @@ class LoraConfig(_LoraConfig): |
51 | 51 | lora_dropout (`float`): |
52 | 52 | The dropout probability for Lora layers. |
53 | 53 | """ |
| 54 | + |
54 | 55 | lora_alpha: int = 32 |
55 | 56 | lora_dropout: float = 0.05 |
56 | 57 |
|
| 58 | + # HACK: The following list of arguments are listed here |
| 59 | + # as a temperorary fix which reduces the field annotation |
| 60 | + # from Optional[List[str], str] to Optional[List[str], str] |
| 61 | + # Please see: https://github.com/huggingface/transformers/issues/40915 for further explanation! |
| 62 | + target_modules: Optional[List[str]] = field( |
| 63 | + default=None, |
| 64 | + metadata={ |
| 65 | + "help": ( |
| 66 | + "List of module names or regex expression of the module names to replace with LoRA. " |
| 67 | + "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. " |
| 68 | + "This can also be a wildcard 'all-linear' which matches all linear/Conv1D " |
| 69 | + "(if the model is a PreTrainedModel, the output layer excluded). " |
| 70 | + "If not specified, modules will be chosen according to the model architecture, If the architecture is " |
| 71 | + "not known, an error will be raised -- in this case, you should specify the target modules manually. " |
| 72 | + "To avoid targeting any modules (because you want to apply `target_parameters`), set " |
| 73 | + "`target_modules=[]`." |
| 74 | + ), |
| 75 | + }, |
| 76 | + ) |
| 77 | + exclude_modules: List[str] | None = field( |
| 78 | + default=None, |
| 79 | + metadata={ |
| 80 | + "help": "List of module names or regex expression of the module names to exclude from Lora." |
| 81 | + }, |
| 82 | + ) |
| 83 | + init_lora_weights: (bool) = field( |
| 84 | + default=True, |
| 85 | + metadata={ |
| 86 | + "help": ( |
| 87 | + "How to initialize the weights of the LoRA layers. " |
| 88 | + "Passing True (default) results in the default initialization from the reference implementation from " |
| 89 | + "Microsoft, with the LoRA B weight being set to 0. This means that without further training, the LoRA " |
| 90 | + "adapter will be a no-op. " |
| 91 | + "Setting the initialization to False leads to random initialization of LoRA A and B, meaning that LoRA " |
| 92 | + "is not a no-op before training; this setting is intended for debugging purposes. " |
| 93 | + ), |
| 94 | + }, |
| 95 | + ) |
| 96 | + layers_to_transform: Optional[list[int]] = field( |
| 97 | + default=None, |
| 98 | + metadata={ |
| 99 | + "help": "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers indexes that are specified inside this list. If a single integer is passed, PEFT will transform only the layer at this index. " |
| 100 | + "This only works when target_modules is a list of str." |
| 101 | + }, |
| 102 | + ) |
| 103 | + layers_pattern: Optional[list[str]] = field( |
| 104 | + default=None, |
| 105 | + metadata={ |
| 106 | + "help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern." |
| 107 | + "This only works when target_modules is a list of str. This should target the `nn.ModuleList` of the " |
| 108 | + "model, which is often called `'layers'` or `'h'`." |
| 109 | + }, |
| 110 | + ) |
| 111 | + trainable_token_indices: Optional[list[int]] = field( |
| 112 | + default=None, |
| 113 | + metadata={ |
| 114 | + "help": ( |
| 115 | + "Lets you specify which token indices to selectively fine-tune without requiring to re-train the " |
| 116 | + "whole embedding matrix using the `peft.TrainableTokensModel` method. You can specify token indices " |
| 117 | + "in two ways. Either you specify a list of indices which will then target the model's input embedding " |
| 118 | + "layer (or, if not found, `embed_tokens`). (Not supported yet) Alternatively, you can specify a dictionary where the key " |
| 119 | + "is the name of the embedding module and the values are the list of token indices, e.g. " |
| 120 | + "`{'embed_tokens': [0, 1, ...]}`. Note that training with FSDP requires `use_orig_params=True` to " |
| 121 | + "avoid issues with non-uniform `requires_grad`." |
| 122 | + ) |
| 123 | + }, |
| 124 | + ) |
| 125 | + loftq_config: dict = field( |
| 126 | + default_factory=dict, |
| 127 | + metadata={ |
| 128 | + "help": ( |
| 129 | + "The configuration of LoftQ. If this is passed, then LoftQ will be used to quantize the backbone " |
| 130 | + "weights and initialize Lora layers. Also set `init_lora_weights='loftq'` in this case." |
| 131 | + ) |
| 132 | + }, |
| 133 | + ) |
| 134 | + |
57 | 135 | def __post_init__(self): |
58 | 136 | # If target_modules is a single-element list, convert it into a plain string |
59 | 137 | if self.target_modules == ["all-linear"]: |
|
0 commit comments