File tree Expand file tree Collapse file tree 2 files changed +8
-8
lines changed
recipes/quickstart/finetuning
src/llama_recipes/configs Expand file tree Collapse file tree 2 files changed +8
-8
lines changed Original file line number Diff line number Diff line change @@ -27,8 +27,8 @@ It lets us specify the training settings for everything from `model_name` to `da
27
27
``` python
28
28
model_name: str = " PATH/to/Model"
29
29
tokenizer_name: str = None
30
- enable_fsdp: bool = False
31
- low_cpu_fsdp: bool = False
30
+ enable_fsdp: bool = False # shards model parameters, optimizer states and gradients across DDP ranks
31
+ low_cpu_fsdp: bool = False # saves cpu memory by loading pretrained model on rank0 only
32
32
run_validation: bool = True
33
33
batch_size_training: int = 4
34
34
batching_strategy: str = " packing" # alternative: padding
@@ -42,14 +42,14 @@ It lets us specify the training settings for everything from `model_name` to `da
42
42
num_workers_dataloader: int = 1
43
43
lr: float = 1e-4
44
44
weight_decay: float = 0.0
45
- gamma: float = 0.85
45
+ gamma: float = 0.85 # multiplicatively decay the learning rate by gamma after each epoch
46
46
seed: int = 42
47
47
use_fp16: bool = False
48
48
mixed_precision: bool = True
49
49
val_batch_size: int = 1
50
50
dataset = " samsum_dataset"
51
51
peft_method: str = " lora" # None, llama_adapter (Caution: llama_adapter is currently not supported with FSDP)
52
- use_peft: bool = False
52
+ use_peft: bool = False # use parameter efficient fine tuning
53
53
from_peft_checkpoint: str = " " # if not empty and use_peft=True, will load the peft checkpoint and resume the fine-tuning on that checkpoint
54
54
output_dir: str = " PATH/to/save/PEFT/model"
55
55
freeze_layers: bool = False
Original file line number Diff line number Diff line change 8
8
class train_config :
9
9
model_name : str = "PATH/to/Model"
10
10
tokenizer_name : str = None
11
- enable_fsdp : bool = False
12
- low_cpu_fsdp : bool = False
11
+ enable_fsdp : bool = False # shards model parameters, optimizer states and gradients across DDP ranks
12
+ low_cpu_fsdp : bool = False # saves cpu memory by loading pretrained model on rank0 only
13
13
run_validation : bool = True
14
14
batch_size_training : int = 4
15
15
batching_strategy : str = "packing" #alternative: padding
@@ -23,14 +23,14 @@ class train_config:
23
23
num_workers_dataloader : int = 1
24
24
lr : float = 1e-4
25
25
weight_decay : float = 0.0
26
- gamma : float = 0.85
26
+ gamma : float = 0.85 # multiplicatively decay the learning rate by gamma after each epoch
27
27
seed : int = 42
28
28
use_fp16 : bool = False
29
29
mixed_precision : bool = True
30
30
val_batch_size : int = 1
31
31
dataset = "samsum_dataset"
32
32
peft_method : str = "lora" # None, llama_adapter (Caution: llama_adapter is currently not supported with FSDP)
33
- use_peft : bool = False
33
+ use_peft : bool = False # use parameter efficient fine tuning
34
34
from_peft_checkpoint : str = "" # if not empty and use_peft=True, will load the peft checkpoint and resume the fine-tuning on that checkpoint
35
35
output_dir : str = "PATH/to/save/PEFT/model"
36
36
freeze_layers : bool = False
You can’t perform that action at this time.
0 commit comments