1+ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
15+
16+ # To run this recipe, please use the following command:
17+ # torchrun --nproc-per-node=8 recipes/llm_finetune/finetune.py --config recipes/llm_finetune/nemotron/llama3_3_nemotron_super_49B_squad_peft.yaml
18+
19+
20+ step_scheduler :
21+ global_batch_size : 8
22+ local_batch_size : 1
23+ ckpt_every_steps : 1000
24+ val_every_steps : 1000 # will run every x number of gradient steps
25+ max_steps : 100
26+
27+ dist_env :
28+ backend : nccl
29+ timeout_minutes : 1
30+
31+ rng :
32+ _target_ : nemo_automodel.components.training.rng.StatefulRNG
33+ seed : 1111
34+ ranked : true
35+
36+ model :
37+ _target_ : nemo_automodel.NeMoAutoModelForCausalLM.from_pretrained
38+ pretrained_model_name_or_path : nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
39+
40+ # torch.compile configuration
41+ compile :
42+ enabled : false
43+ mode : " default" # Options: "default", "reduce-overhead", "max-autotune"
44+ fullgraph : false
45+ dynamic : true # Set to false for better performance with fixed shapes
46+ backend : null # Use default backend (inductor)
47+
48+ peft :
49+ _target_ : nemo_automodel.components._peft.lora.PeftConfig
50+ match_all_linear : True
51+ dim : 8
52+ alpha : 32
53+ use_triton : True
54+
55+ distributed :
56+ _target_ : nemo_automodel.components.distributed.fsdp2.FSDP2Manager
57+ dp_size : none
58+ dp_replicate_size : 1 # dp_shard_size = dp_size / dp_replicate_size and dp_shard_size < dp_size. For DDP usecase, use DDPManager
59+ tp_size : 1
60+ cp_size : 1
61+ sequence_parallel : false
62+
63+ loss_fn :
64+ _target_ : nemo_automodel.components.loss.masked_ce.MaskedCrossEntropy
65+
66+ dataset :
67+ _target_ : nemo_automodel.components.datasets.llm.squad.make_squad_dataset
68+ dataset_name : rajpurkar/squad
69+ split : train
70+
71+ packed_sequence :
72+ packed_sequence_size : 0
73+
74+ dataloader :
75+ _target_ : torchdata.stateful_dataloader.StatefulDataLoader
76+ collate_fn : nemo_automodel.components.datasets.utils.default_collater
77+ shuffle : True
78+
79+ validation_dataset :
80+ _target_ : nemo_automodel.components.datasets.llm.squad.make_squad_dataset
81+ dataset_name : rajpurkar/squad
82+ split : validation
83+ limit_dataset_samples : 64
84+
85+ validation_dataloader :
86+ _target_ : torchdata.stateful_dataloader.StatefulDataLoader
87+ collate_fn : nemo_automodel.components.datasets.utils.default_collater
88+
89+ optimizer :
90+ _target_ : torch.optim.Adam
91+ betas : [0.9, 0.999]
92+ eps : 1e-8
93+ lr : 1.0e-5
94+ weight_decay : 0
95+
96+ lr_scheduler :
97+ lr_decay_style : cosine
98+ min_lr : 1.0e-6
99+
100+ # wandb:
101+ # project: <your_wandb_project>
102+ # entity: <your_wandb_entity>
103+ # name: <your_wandb_exp_name>
104+ # save_dir: <your_wandb_save_dir>
0 commit comments