document less obvious training config parameters (meta-llama#522)

init27 · web-flow · commit eaded5edcc3c · 2024-08-21T04:50:42.000-07:00
diff --git a/recipes/quickstart/finetuning/README.md b/recipes/quickstart/finetuning/README.md
@@ -27,8 +27,8 @@ It lets us specify the training settings for everything from `model_name` to `da
 ```python
     model_name: str="PATH/to/Model"
     tokenizer_name: str=None
-    enable_fsdp: bool=False
-    low_cpu_fsdp: bool=False
+    enable_fsdp: bool=False # shards model parameters, optimizer states and gradients across DDP ranks
+    low_cpu_fsdp: bool=False # saves cpu memory by loading pretrained model on rank0 only
     run_validation: bool=True
     batch_size_training: int=4
     batching_strategy: str="packing" #alternative: padding
@@ -42,14 +42,14 @@ It lets us specify the training settings for everything from `model_name` to `da
     num_workers_dataloader: int=1
     lr: float=1e-4
     weight_decay: float=0.0
-    gamma: float= 0.85
+    gamma: float= 0.85 # multiplicatively decay the learning rate by gamma after each epoch
     seed: int=42
     use_fp16: bool=False
     mixed_precision: bool=True
     val_batch_size: int=1
     dataset = "samsum_dataset"
     peft_method: str = "lora" # None, llama_adapter (Caution: llama_adapter is currently not supported with FSDP)
-    use_peft: bool=False
+    use_peft: bool=False # use parameter efficient fine tuning
     from_peft_checkpoint: str="" # if not empty and use_peft=True, will load the peft checkpoint and resume the fine-tuning on that checkpoint
     output_dir: str = "PATH/to/save/PEFT/model"
     freeze_layers: bool = False
diff --git a/src/llama_recipes/configs/training.py b/src/llama_recipes/configs/training.py
@@ -8,8 +8,8 @@
 class train_config:
     model_name: str="PATH/to/Model"
     tokenizer_name: str=None
-    enable_fsdp: bool=False
-    low_cpu_fsdp: bool=False
+    enable_fsdp: bool=False # shards model parameters, optimizer states and gradients across DDP ranks
+    low_cpu_fsdp: bool=False # saves cpu memory by loading pretrained model on rank0 only
     run_validation: bool=True
     batch_size_training: int=4
     batching_strategy: str="packing" #alternative: padding
@@ -23,14 +23,14 @@ class train_config:
     num_workers_dataloader: int=1
     lr: float=1e-4
     weight_decay: float=0.0
-    gamma: float= 0.85
+    gamma: float= 0.85 # multiplicatively decay the learning rate by gamma after each epoch
     seed: int=42
     use_fp16: bool=False
     mixed_precision: bool=True
     val_batch_size: int=1
     dataset = "samsum_dataset"
     peft_method: str = "lora" # None, llama_adapter (Caution: llama_adapter is currently not supported with FSDP)
-    use_peft: bool=False
+    use_peft: bool=False # use parameter efficient fine tuning
     from_peft_checkpoint: str="" # if not empty and use_peft=True, will load the peft checkpoint and resume the fine-tuning on that checkpoint
     output_dir: str = "PATH/to/save/PEFT/model"
     freeze_layers: bool = False