Skip to content

Commit eaded5e

Browse files
authored
document less obvious training config parameters (meta-llama#522)
2 parents 3a99a54 + 52a85e1 commit eaded5e

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

recipes/quickstart/finetuning/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ It lets us specify the training settings for everything from `model_name` to `da
2727
```python
2828
model_name: str="PATH/to/Model"
2929
tokenizer_name: str=None
30-
enable_fsdp: bool=False
31-
low_cpu_fsdp: bool=False
30+
enable_fsdp: bool=False # shards model parameters, optimizer states and gradients across DDP ranks
31+
low_cpu_fsdp: bool=False # saves cpu memory by loading pretrained model on rank0 only
3232
run_validation: bool=True
3333
batch_size_training: int=4
3434
batching_strategy: str="packing" #alternative: padding
@@ -42,14 +42,14 @@ It lets us specify the training settings for everything from `model_name` to `da
4242
num_workers_dataloader: int=1
4343
lr: float=1e-4
4444
weight_decay: float=0.0
45-
gamma: float= 0.85
45+
gamma: float= 0.85 # multiplicatively decay the learning rate by gamma after each epoch
4646
seed: int=42
4747
use_fp16: bool=False
4848
mixed_precision: bool=True
4949
val_batch_size: int=1
5050
dataset = "samsum_dataset"
5151
peft_method: str = "lora" # None, llama_adapter (Caution: llama_adapter is currently not supported with FSDP)
52-
use_peft: bool=False
52+
use_peft: bool=False # use parameter efficient fine tuning
5353
from_peft_checkpoint: str="" # if not empty and use_peft=True, will load the peft checkpoint and resume the fine-tuning on that checkpoint
5454
output_dir: str = "PATH/to/save/PEFT/model"
5555
freeze_layers: bool = False

src/llama_recipes/configs/training.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
class train_config:
99
model_name: str="PATH/to/Model"
1010
tokenizer_name: str=None
11-
enable_fsdp: bool=False
12-
low_cpu_fsdp: bool=False
11+
enable_fsdp: bool=False # shards model parameters, optimizer states and gradients across DDP ranks
12+
low_cpu_fsdp: bool=False # saves cpu memory by loading pretrained model on rank0 only
1313
run_validation: bool=True
1414
batch_size_training: int=4
1515
batching_strategy: str="packing" #alternative: padding
@@ -23,14 +23,14 @@ class train_config:
2323
num_workers_dataloader: int=1
2424
lr: float=1e-4
2525
weight_decay: float=0.0
26-
gamma: float= 0.85
26+
gamma: float= 0.85 # multiplicatively decay the learning rate by gamma after each epoch
2727
seed: int=42
2828
use_fp16: bool=False
2929
mixed_precision: bool=True
3030
val_batch_size: int=1
3131
dataset = "samsum_dataset"
3232
peft_method: str = "lora" # None, llama_adapter (Caution: llama_adapter is currently not supported with FSDP)
33-
use_peft: bool=False
33+
use_peft: bool=False # use parameter efficient fine tuning
3434
from_peft_checkpoint: str="" # if not empty and use_peft=True, will load the peft checkpoint and resume the fine-tuning on that checkpoint
3535
output_dir: str = "PATH/to/save/PEFT/model"
3636
freeze_layers: bool = False

0 commit comments

Comments
 (0)