Skip to content

Commit 1b6a16e

Browse files
awaelchlicarmoccarasbt
committed
Add help strings for the CLI (#1092)
Co-authored-by: Carlos Mocholí <[email protected]> Co-authored-by: rasbt <[email protected]>
1 parent 2c2eba5 commit 1b6a16e

File tree

13 files changed

+563
-95
lines changed

13 files changed

+563
-95
lines changed
Lines changed: 68 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,96 @@
1+
2+
# The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
3+
checkpoint_dir: checkpoints/meta-llama/Llama-2-7b-hf
4+
5+
# Directory in which to save checkpoints and logs. (type: <class 'Path'>, default: out/finetune/full)
6+
out_dir: out/finetune/full-llama2-7b
7+
8+
# The precision to use for finetuning. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
19
precision: bf16-true
10+
11+
# How many devices/GPUs to use (type: Union[int, str], default: 1)
212
devices: 4
13+
14+
# Path to a checkpoint directory to resume from in case training was interrupted, or ``True`` to resume
15+
# from the latest checkpoint in ``out_dir``. (type: Union[bool, Path], default: False)
316
resume: false
4-
seed: 1337
17+
18+
# Data-related arguments. If not provided, the default is ``litgpt.data.Alpaca``.
519
data:
620
class_path: litgpt.data.AlpacaGPT4
721
init_args:
822
mask_prompt: false
923
val_split_fraction: 0.03847
10-
prompt_style: "alpaca"
24+
prompt_style: alpaca
1125
ignore_index: -100
1226
seed: 42
1327
num_workers: 4
14-
download_dir: data/alpacagpt4
15-
file_url: https://raw.githubusercontent.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM/main/data/alpaca_gpt4_data.json
16-
file_name: alpacagpt4_data_cleaned_archive.json
17-
checkpoint_dir: checkpoints/meta-llama/Llama-2-7b-hf
18-
out_dir: out/finetune/full-llama2-7b
28+
29+
# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
1930
train:
31+
32+
# Number of optimizer steps between saving checkpoints (type: Optional[int], default: 1000)
2033
save_interval: 200
34+
35+
# Number of iterations between logging calls (type: int, default: 1)
2136
log_interval: 1
37+
38+
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 64)
2239
global_batch_size: 64
40+
41+
# Number of samples per data-parallel rank (type: int, default: 1)
2342
micro_batch_size: 4
43+
44+
# Number of iterations with learning rate warmup active (type: int, default: 100)
2445
lr_warmup_steps: 25
46+
47+
# Number of epochs to train on (type: Optional[int], default: 5)
2548
epochs: 1
26-
max_tokens: null
27-
max_steps: null
49+
50+
# Total number of tokens to train on (type: Optional[int], default: null)
51+
max_tokens:
52+
53+
# Limits the number of optimizer steps to run. (type: Optional[int], default: null)
54+
max_steps:
55+
56+
# Limits the length of samples. Off by default (type: Optional[int], default: null)
2857
max_seq_length: 512
29-
tie_embeddings: null
58+
59+
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
60+
tie_embeddings:
61+
62+
# (type: float, default: 0.003)
3063
learning_rate: 0.0002
64+
65+
# (type: float, default: 0.02)
3166
weight_decay: 0.1
67+
68+
# (type: float, default: 0.9)
3269
beta1: 0.9
70+
71+
# (type: float, default: 0.95)
3372
beta2: 0.95
34-
max_norm: null
73+
74+
# (type: Optional[float], default: null)
75+
max_norm:
76+
77+
# (type: float, default: 6e-05)
3578
min_lr: 6.0e-05
79+
80+
# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
3681
eval:
82+
83+
# Number of optimizer steps between evaluation calls (type: int, default: 600)
3784
interval: 100
85+
86+
# Number of tokens to generate (type: Optional[int], default: 100)
3887
max_new_tokens: 100
88+
89+
# Number of iterations (type: int, default: 100)
3990
max_iters: 100
91+
92+
# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
93+
logger_name: csv
94+
95+
# The random seed to use for reproducibility. (type: int, default: 1337)
96+
seed: 1337
Lines changed: 86 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,122 @@
1+
2+
# The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
3+
checkpoint_dir: checkpoints/meta-llama/Llama-2-7b-hf
4+
5+
# Directory in which to save checkpoints and logs. (type: <class 'Path'>, default: out/lora)
6+
out_dir: out/finetune/lora-llama2-7b
7+
8+
# The precision to use for finetuning. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
19
precision: bf16-true
2-
quantize: null
10+
11+
# If set, quantize the model with this algorithm. See ``tutorials/quantize.md`` for more information. (type: Optional[Literal['nf4', 'nf4-dq', 'fp4', 'fp4-dq', 'int8-training']], default: null)
12+
quantize:
13+
14+
# How many devices/GPUs to use. (type: Union[int, str], default: 1)
315
devices: 1
4-
seed: 1337
16+
17+
# The LoRA rank. (type: int, default: 8)
518
lora_r: 32
19+
20+
# The LoRA alpha. (type: int, default: 16)
621
lora_alpha: 16
22+
23+
# The LoRA dropout value. (type: float, default: 0.05)
724
lora_dropout: 0.05
25+
26+
# Whether to apply LoRA to the query weights in attention. (type: bool, default: True)
827
lora_query: true
28+
29+
# Whether to apply LoRA to the key weights in attention. (type: bool, default: False)
930
lora_key: false
31+
32+
# Whether to apply LoRA to the value weights in attention. (type: bool, default: True)
1033
lora_value: true
34+
35+
# Whether to apply LoRA to the output projection in the attention block. (type: bool, default: False)
1136
lora_projection: false
37+
38+
# Whether to apply LoRA to the weights of the MLP in the attention block. (type: bool, default: False)
1239
lora_mlp: false
40+
41+
# Whether to apply LoRA to output head in GPT. (type: bool, default: False)
1342
lora_head: false
43+
44+
# Data-related arguments. If not provided, the default is ``litgpt.data.Alpaca``.
1445
data:
1546
class_path: litgpt.data.AlpacaGPT4
1647
init_args:
1748
mask_prompt: false
1849
val_split_fraction: 0.03847
19-
prompt_style: "alpaca"
50+
prompt_style: alpaca
2051
ignore_index: -100
2152
seed: 42
2253
num_workers: 4
23-
download_dir: data/alpacagpt4
24-
file_url: https://raw.githubusercontent.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM/main/data/alpaca_gpt4_data.json
25-
file_name: alpacagpt4_data_cleaned_archive.json
26-
checkpoint_dir: checkpoints/meta-llama/Llama-2-7b-hf
27-
out_dir: out/finetune/lora-llama2-7b
54+
55+
# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
2856
train:
57+
58+
# Number of optimizer steps between saving checkpoints (type: Optional[int], default: 1000)
2959
save_interval: 200
60+
61+
# Number of iterations between logging calls (type: int, default: 1)
3062
log_interval: 1
63+
64+
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 128)
3165
global_batch_size: 8
66+
67+
# Number of samples per data-parallel rank (type: int, default: 4)
3268
micro_batch_size: 2
69+
70+
# Number of iterations with learning rate warmup active (type: int, default: 100)
3371
lr_warmup_steps: 10
72+
73+
# Number of epochs to train on (type: Optional[int], default: 5)
3474
epochs: 1
35-
max_tokens: null
36-
max_steps: null
75+
76+
# Total number of tokens to train on (type: Optional[int], default: null)
77+
max_tokens:
78+
79+
# Limits the number of optimizer steps to run. (type: Optional[int], default: null)
80+
max_steps:
81+
82+
# Limits the length of samples. Off by default (type: Optional[int], default: null)
3783
max_seq_length: 512
38-
tie_embeddings: null
84+
85+
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
86+
tie_embeddings:
87+
88+
# (type: float, default: 0.0003)
3989
learning_rate: 0.0002
90+
91+
# (type: float, default: 0.02)
4092
weight_decay: 0.0
93+
94+
# (type: float, default: 0.9)
4195
beta1: 0.9
96+
97+
# (type: float, default: 0.95)
4298
beta2: 0.95
43-
max_norm: null
99+
100+
# (type: Optional[float], default: null)
101+
max_norm:
102+
103+
# (type: float, default: 6e-05)
44104
min_lr: 6.0e-05
105+
106+
# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
45107
eval:
108+
109+
# Number of optimizer steps between evaluation calls (type: int, default: 100)
46110
interval: 100
111+
112+
# Number of tokens to generate (type: Optional[int], default: 100)
47113
max_new_tokens: 100
114+
115+
# Number of iterations (type: int, default: 100)
48116
max_iters: 100
117+
118+
# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
119+
logger_name: csv
120+
121+
# The random seed to use for reproducibility. (type: int, default: 1337)
122+
seed: 1337
Lines changed: 87 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,122 @@
1+
2+
# The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
3+
checkpoint_dir: checkpoints/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
4+
5+
# Directory in which to save checkpoints and logs. (type: <class 'Path'>, default: out/lora)
6+
out_dir: out/finetune/lora-tiny-llama-1.1b
7+
8+
# The precision to use for finetuning. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
19
precision: bf16-true
2-
quantize: null
10+
11+
# If set, quantize the model with this algorithm. See ``tutorials/quantize.md`` for more information. (type: Optional[Literal['nf4', 'nf4-dq', 'fp4', 'fp4-dq', 'int8-training']], default: null)
12+
quantize:
13+
14+
# How many devices/GPUs to use. (type: Union[int, str], default: 1)
315
devices: 1
4-
seed: 1337
16+
17+
# The LoRA rank. (type: int, default: 8)
518
lora_r: 32
19+
20+
# The LoRA alpha. (type: int, default: 16)
621
lora_alpha: 16
22+
23+
# The LoRA dropout value. (type: float, default: 0.05)
724
lora_dropout: 0.05
25+
26+
# Whether to apply LoRA to the query weights in attention. (type: bool, default: True)
827
lora_query: true
28+
29+
# Whether to apply LoRA to the key weights in attention. (type: bool, default: False)
930
lora_key: false
31+
32+
# Whether to apply LoRA to the value weights in attention. (type: bool, default: True)
1033
lora_value: true
34+
35+
# Whether to apply LoRA to the output projection in the attention block. (type: bool, default: False)
1136
lora_projection: false
37+
38+
# Whether to apply LoRA to the weights of the MLP in the attention block. (type: bool, default: False)
1239
lora_mlp: false
40+
41+
# Whether to apply LoRA to output head in GPT. (type: bool, default: False)
1342
lora_head: false
43+
44+
# Data-related arguments. If not provided, the default is ``litgpt.data.Alpaca``.
1445
data:
1546
class_path: litgpt.data.AlpacaGPT4
1647
init_args:
1748
mask_prompt: false
1849
val_split_fraction: 0.03847
19-
prompt_style: "alpaca"
50+
prompt_style: alpaca
2051
ignore_index: -100
2152
seed: 42
2253
num_workers: 4
23-
download_dir: data/alpacagpt4
24-
file_url: https://raw.githubusercontent.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM/main/data/alpaca_gpt4_data.json
25-
file_name: alpacagpt4_data_cleaned_archive.json
26-
checkpoint_dir: checkpoints/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
27-
out_dir: out/finetune/lora-tiny-llama-1.1b
54+
55+
# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
2856
train:
57+
58+
# Number of optimizer steps between saving checkpoints (type: Optional[int], default: 1000)
2959
save_interval: 800
60+
61+
# Number of iterations between logging calls (type: int, default: 1)
3062
log_interval: 1
63+
64+
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 128)
3165
global_batch_size: 8
66+
67+
# Number of samples per data-parallel rank (type: int, default: 4)
3268
micro_batch_size: 8
69+
70+
# Number of iterations with learning rate warmup active (type: int, default: 100)
3371
lr_warmup_steps: 10
72+
73+
# Number of epochs to train on (type: Optional[int], default: 5)
3474
epochs: 1
35-
max_tokens: null
36-
max_steps: null
75+
76+
# Total number of tokens to train on (type: Optional[int], default: null)
77+
max_tokens:
78+
79+
# Limits the number of optimizer steps to run. (type: Optional[int], default: null)
80+
max_steps:
81+
82+
# Limits the length of samples. Off by default (type: Optional[int], default: null)
3783
max_seq_length: 512
38-
tie_embeddings: null
84+
85+
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
86+
tie_embeddings:
87+
88+
# (type: float, default: 0.0003)
3989
learning_rate: 0.0002
90+
91+
# (type: float, default: 0.02)
4092
weight_decay: 0.0
93+
94+
# (type: float, default: 0.9)
4195
beta1: 0.9
96+
97+
# (type: float, default: 0.95)
4298
beta2: 0.95
43-
max_norm: null
99+
100+
# (type: Optional[float], default: null)
101+
max_norm:
102+
103+
# (type: float, default: 6e-05)
44104
min_lr: 6.0e-05
105+
106+
# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
45107
eval:
108+
109+
# Number of optimizer steps between evaluation calls (type: int, default: 100)
46110
interval: 400
111+
112+
# Number of tokens to generate (type: Optional[int], default: 100)
47113
max_new_tokens: 100
48-
max_iters: 100
114+
115+
# Number of iterations (type: int, default: 100)
116+
max_iters: 100
117+
118+
# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
119+
logger_name: csv
120+
121+
# The random seed to use for reproducibility. (type: int, default: 1337)
122+
seed: 1337

0 commit comments

Comments
 (0)