Skip to content

Commit 2f7001e

Browse files
committed
document less obvious training config parameters
1 parent 519c5a6 commit 2f7001e

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

recipes/finetuning/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ It lets us specify the training settings for everything from `model_name` to `da
2727
```python
2828
model_name: str="PATH/to/Model"
2929
tokenizer_name: str=None
30-
enable_fsdp: bool=False
31-
low_cpu_fsdp: bool=False
30+
enable_fsdp: bool=False # shards model parameters, optimizer states and gradients across DDP ranks
31+
low_cpu_fsdp: bool=False # saves cpu memory by loading pretrained model on rank0 only
3232
run_validation: bool=True
3333
batch_size_training: int=4
3434
batching_strategy: str="packing" #alternative: padding
@@ -42,14 +42,14 @@ It lets us specify the training settings for everything from `model_name` to `da
4242
num_workers_dataloader: int=1
4343
lr: float=1e-4
4444
weight_decay: float=0.0
45-
gamma: float= 0.85
45+
gamma: float= 0.85 # multiplicatively decay the learning rate by gamma after each epoch
4646
seed: int=42
4747
use_fp16: bool=False
4848
mixed_precision: bool=True
4949
val_batch_size: int=1
5050
dataset = "samsum_dataset"
5151
peft_method: str = "lora" # None,llama_adapter, prefix
52-
use_peft: bool=False
52+
use_peft: bool=False # use parameter efficient fine tuning
5353
output_dir: str = "PATH/to/save/PEFT/model"
5454
freeze_layers: bool = False
5555
num_freeze_layers: int = 1

src/llama_recipes/configs/training.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
class train_config:
99
model_name: str="PATH/to/Model"
1010
tokenizer_name: str=None
11-
enable_fsdp: bool=False
12-
low_cpu_fsdp: bool=False
11+
enable_fsdp: bool=False # shards model parameters, optimizer states and gradients across DDP ranks
12+
low_cpu_fsdp: bool=False # saves cpu memory by loading pretrained model on rank0 only
1313
run_validation: bool=True
1414
batch_size_training: int=4
1515
batching_strategy: str="packing" #alternative: padding
@@ -23,14 +23,14 @@ class train_config:
2323
num_workers_dataloader: int=1
2424
lr: float=1e-4
2525
weight_decay: float=0.0
26-
gamma: float= 0.85
26+
gamma: float= 0.85 # multiplicatively decay the learning rate by gamma after each epoch
2727
seed: int=42
2828
use_fp16: bool=False
2929
mixed_precision: bool=True
3030
val_batch_size: int=1
3131
dataset = "samsum_dataset"
3232
peft_method: str = "lora" # None, llama_adapter (Caution: llama_adapter is currently not supported with FSDP)
33-
use_peft: bool=False
33+
use_peft: bool=False # use parameter efficient fine tuning
3434
output_dir: str = "PATH/to/save/PEFT/model"
3535
freeze_layers: bool = False
3636
num_freeze_layers: int = 1

0 commit comments

Comments
 (0)