Skip to content

Commit 24f859a

Browse files
committed
configs
Signed-off-by: adil-a <adil.asif2000@hotmail.com>
1 parent 9638e80 commit 24f859a

File tree

2 files changed

+202
-0
lines changed

2 files changed

+202
-0
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# To run this recipe, please use the following command:
17+
# torchrun --nproc-per-node=8 recipes/llm_finetune/finetune.py --config recipes/llm_finetune/nemotron/llama3_3_nemotron_super_49B_squad_peft.yaml
18+
19+
20+
step_scheduler:
21+
global_batch_size: 16
22+
local_batch_size: 1
23+
ckpt_every_steps: 1000
24+
val_every_steps: 1000 # will run every x number of gradient steps
25+
max_steps: 100
26+
27+
dist_env:
28+
backend: nccl
29+
timeout_minutes: 1
30+
31+
rng:
32+
_target_: nemo_automodel.components.training.rng.StatefulRNG
33+
seed: 1111
34+
ranked: true
35+
36+
model:
37+
_target_: nemo_automodel.NeMoAutoModelForCausalLM.from_pretrained
38+
pretrained_model_name_or_path: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
39+
40+
# torch.compile configuration
41+
compile:
42+
enabled: false
43+
mode: "default" # Options: "default", "reduce-overhead", "max-autotune"
44+
fullgraph: false
45+
dynamic: true # Set to false for better performance with fixed shapes
46+
backend: null # Use default backend (inductor)
47+
48+
distributed:
49+
_target_: nemo_automodel.components.distributed.fsdp2.FSDP2Manager
50+
dp_size: none
51+
dp_replicate_size: 1 # dp_shard_size = dp_size / dp_replicate_size and dp_shard_size < dp_size. For DDP usecase, use DDPManager
52+
tp_size: 1
53+
cp_size: 1
54+
sequence_parallel: false
55+
defer_fsdp_grad_sync: false
56+
57+
loss_fn:
58+
_target_: nemo_automodel.components.loss.masked_ce.MaskedCrossEntropy
59+
60+
dataset:
61+
_target_: nemo_automodel.components.datasets.llm.squad.make_squad_dataset
62+
dataset_name: rajpurkar/squad
63+
split: train
64+
65+
packed_sequence:
66+
packed_sequence_size: 0
67+
68+
dataloader:
69+
_target_: torchdata.stateful_dataloader.StatefulDataLoader
70+
collate_fn: nemo_automodel.components.datasets.utils.default_collater
71+
shuffle: True
72+
73+
validation_dataset:
74+
_target_: nemo_automodel.components.datasets.llm.squad.make_squad_dataset
75+
dataset_name: rajpurkar/squad
76+
split: validation
77+
limit_dataset_samples: 64
78+
79+
validation_dataloader:
80+
_target_: torchdata.stateful_dataloader.StatefulDataLoader
81+
collate_fn: nemo_automodel.components.datasets.utils.default_collater
82+
83+
optimizer:
84+
_target_: torch.optim.Adam
85+
betas: [0.9, 0.999]
86+
eps: 1e-8
87+
lr: 1.0e-5
88+
weight_decay: 0
89+
90+
lr_scheduler:
91+
lr_decay_style: cosine
92+
min_lr: 1.0e-6
93+
94+
# wandb:
95+
# project: <your_wandb_project>
96+
# entity: <your_wandb_entity>
97+
# name: <your_wandb_exp_name>
98+
# save_dir: <your_wandb_save_dir>
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# To run this recipe, please use the following command:
17+
# torchrun --nproc-per-node=8 recipes/llm_finetune/finetune.py --config recipes/llm_finetune/nemotron/llama3_3_nemotron_super_49B_squad_peft.yaml
18+
19+
20+
step_scheduler:
21+
global_batch_size: 8
22+
local_batch_size: 1
23+
ckpt_every_steps: 1000
24+
val_every_steps: 1000 # will run every x number of gradient steps
25+
max_steps: 100
26+
27+
dist_env:
28+
backend: nccl
29+
timeout_minutes: 1
30+
31+
rng:
32+
_target_: nemo_automodel.components.training.rng.StatefulRNG
33+
seed: 1111
34+
ranked: true
35+
36+
model:
37+
_target_: nemo_automodel.NeMoAutoModelForCausalLM.from_pretrained
38+
pretrained_model_name_or_path: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
39+
40+
# torch.compile configuration
41+
compile:
42+
enabled: false
43+
mode: "default" # Options: "default", "reduce-overhead", "max-autotune"
44+
fullgraph: false
45+
dynamic: true # Set to false for better performance with fixed shapes
46+
backend: null # Use default backend (inductor)
47+
48+
peft:
49+
_target_: nemo_automodel.components._peft.lora.PeftConfig
50+
match_all_linear: True
51+
dim: 8
52+
alpha: 32
53+
use_triton: True
54+
55+
distributed:
56+
_target_: nemo_automodel.components.distributed.fsdp2.FSDP2Manager
57+
dp_size: none
58+
dp_replicate_size: 1 # dp_shard_size = dp_size / dp_replicate_size and dp_shard_size < dp_size. For DDP usecase, use DDPManager
59+
tp_size: 1
60+
cp_size: 1
61+
sequence_parallel: false
62+
63+
loss_fn:
64+
_target_: nemo_automodel.components.loss.masked_ce.MaskedCrossEntropy
65+
66+
dataset:
67+
_target_: nemo_automodel.components.datasets.llm.squad.make_squad_dataset
68+
dataset_name: rajpurkar/squad
69+
split: train
70+
71+
packed_sequence:
72+
packed_sequence_size: 0
73+
74+
dataloader:
75+
_target_: torchdata.stateful_dataloader.StatefulDataLoader
76+
collate_fn: nemo_automodel.components.datasets.utils.default_collater
77+
shuffle: True
78+
79+
validation_dataset:
80+
_target_: nemo_automodel.components.datasets.llm.squad.make_squad_dataset
81+
dataset_name: rajpurkar/squad
82+
split: validation
83+
limit_dataset_samples: 64
84+
85+
validation_dataloader:
86+
_target_: torchdata.stateful_dataloader.StatefulDataLoader
87+
collate_fn: nemo_automodel.components.datasets.utils.default_collater
88+
89+
optimizer:
90+
_target_: torch.optim.Adam
91+
betas: [0.9, 0.999]
92+
eps: 1e-8
93+
lr: 1.0e-5
94+
weight_decay: 0
95+
96+
lr_scheduler:
97+
lr_decay_style: cosine
98+
min_lr: 1.0e-6
99+
100+
# wandb:
101+
# project: <your_wandb_project>
102+
# entity: <your_wandb_entity>
103+
# name: <your_wandb_exp_name>
104+
# save_dir: <your_wandb_save_dir>

0 commit comments

Comments
 (0)