Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions examples/llm_finetune/devstral/devstral2_small_2512_squad.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# To run this recipe, please use the following command:
# torchrun --nproc-per-node=8 examples/llm_finetune/finetune.py --config examples/llm_finetune/devstral/devstral2_small_2512_hellaswag.yaml
# Adjust --nproc-per-node to the number of GPUs available on your host machine.


step_scheduler:
global_batch_size: 64
local_batch_size: 1
ckpt_every_steps: 200
val_every_steps: 100 # will run every x number of gradient steps
num_epochs: 1

dist_env:
backend: nccl
timeout_minutes: 1

rng:
_target_: nemo_automodel.components.training.rng.StatefulRNG
seed: 1111
ranked: true

model:
_target_: nemo_automodel.NeMoAutoModelForCausalLM.from_pretrained
pretrained_model_name_or_path: akoumpa/Devstral-Small-2-24B-Instruct-2512-BF16

checkpoint:
enabled: true
checkpoint_dir: checkpoints/
model_save_format: torch_save # torch_save or safetensors
save_consolidated: false # saves the model in a consolidated safetensors format. Requires model_save_format to be safetensors.

distributed:
_target_: nemo_automodel.components.distributed.fsdp2.FSDP2Manager
dp_size: none
dp_replicate_size: 1 # dp_shard_size = dp_size / dp_replicate_size and dp_shard_size < dp_size. For DDP usecase, use DDPManager
tp_size: 1
cp_size: 1
sequence_parallel: false

loss_fn:
_target_: nemo_automodel.components.loss.masked_ce.MaskedCrossEntropy

dataset:
_target_: nemo_automodel.components.datasets.llm.squad.make_squad_dataset
dataset_name: rajpurkar/squad
split: train

packed_sequence:
# Set packed_sequence_size > 0 to run with packed sequences
packed_sequence_size: 0

dataloader:
_target_: torchdata.stateful_dataloader.StatefulDataLoader
collate_fn: nemo_automodel.components.datasets.utils.default_collater
shuffle: true


validation_dataset:
_target_: nemo_automodel.components.datasets.llm.squad.make_squad_dataset
dataset_name: rajpurkar/squad
split: validation
limit_dataset_samples: 64

validation_dataloader:
_target_: torchdata.stateful_dataloader.StatefulDataLoader
collate_fn: nemo_automodel.components.datasets.utils.default_collater

optimizer:
_target_: torch.optim.Adam
betas: [0.9, 0.999]
eps: 1e-8
lr: 1.0e-5
weight_decay: 0
# min_lr: 1.0e-5

lr_scheduler:
lr_decay_style: cosine
min_lr: 1.0e-6

# Uncomment and configure for W&B logging
# wandb:
# project: <your_wandb_project>
# entity: <your_wandb_entity>
# name: <your_wandb_exp_name>
# save_dir: <your_wandb_save_dir>
108 changes: 108 additions & 0 deletions examples/llm_finetune/devstral/devstral2_small_2512_squad_peft.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# To run this recipe, please use the following command:
# torchrun --nproc-per-node=8 examples/llm_finetune/finetune.py --config examples/llm_finetune/devstral/devstral2_small_2512_hellaswag.yaml
# Adjust --nproc-per-node to the number of GPUs available on your host machine.


step_scheduler:
global_batch_size: 64
local_batch_size: 1
ckpt_every_steps: 200
val_every_steps: 100 # will run every x number of gradient steps
num_epochs: 1

dist_env:
backend: nccl
timeout_minutes: 1

rng:
_target_: nemo_automodel.components.training.rng.StatefulRNG
seed: 1111
ranked: true

model:
_target_: nemo_automodel.NeMoAutoModelForCausalLM.from_pretrained
pretrained_model_name_or_path: akoumpa/Devstral-Small-2-24B-Instruct-2512-BF16

peft:
_target_: nemo_automodel.components._peft.lora.PeftConfig
match_all_linear: True
dim: 8
alpha: 32
use_triton: True
# dtype needs a fix to resolve to type instead of string
# lora_dtype: torch.bfloat16

checkpoint:
enabled: true
checkpoint_dir: checkpoints/
model_save_format: torch_save # torch_save or safetensors
save_consolidated: false # saves the model in a consolidated safetensors format. Requires model_save_format to be safetensors.

distributed:
_target_: nemo_automodel.components.distributed.fsdp2.FSDP2Manager
dp_size: none
dp_replicate_size: 1 # dp_shard_size = dp_size / dp_replicate_size and dp_shard_size < dp_size. For DDP usecase, use DDPManager
tp_size: 1
cp_size: 1
sequence_parallel: false

loss_fn:
_target_: nemo_automodel.components.loss.masked_ce.MaskedCrossEntropy

dataset:
_target_: nemo_automodel.components.datasets.llm.squad.make_squad_dataset
dataset_name: rajpurkar/squad
split: train

packed_sequence:
# Set packed_sequence_size > 0 to run with packed sequences
packed_sequence_size: 0

dataloader:
_target_: torchdata.stateful_dataloader.StatefulDataLoader
collate_fn: nemo_automodel.components.datasets.utils.default_collater
shuffle: true


validation_dataset:
_target_: nemo_automodel.components.datasets.llm.squad.make_squad_dataset
dataset_name: rajpurkar/squad
split: validation
limit_dataset_samples: 64

validation_dataloader:
_target_: torchdata.stateful_dataloader.StatefulDataLoader
collate_fn: nemo_automodel.components.datasets.utils.default_collater

optimizer:
_target_: torch.optim.Adam
betas: [0.9, 0.999]
eps: 1e-8
lr: 1.0e-5
weight_decay: 0
# min_lr: 1.0e-5

lr_scheduler:
lr_decay_style: cosine
min_lr: 1.0e-6

# Uncomment and configure for W&B logging
# wandb:
# project: <your_wandb_project>
# entity: <your_wandb_entity>
# name: <your_wandb_exp_name>
# save_dir: <your_wandb_save_dir>
3 changes: 3 additions & 0 deletions nemo_automodel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,18 @@
NeMoAutoModelForSequenceClassification,
NeMoAutoModelForTextToWaveform,
) # noqa: I001
from nemo_automodel._transformers.auto_tokenizer import NeMoAutoTokenizer

globals()["NeMoAutoModelForCausalLM"] = NeMoAutoModelForCausalLM
globals()["NeMoAutoModelForImageTextToText"] = NeMoAutoModelForImageTextToText
globals()["NeMoAutoModelForSequenceClassification"] = NeMoAutoModelForSequenceClassification
globals()["NeMoAutoModelForTextToWaveform"] = NeMoAutoModelForTextToWaveform
globals()["NeMoAutoTokenizer"] = NeMoAutoTokenizer
__all__.append("NeMoAutoModelForCausalLM")
__all__.append("NeMoAutoModelForImageTextToText")
__all__.append("NeMoAutoModelForSequenceClassification")
__all__.append("NeMoAutoModelForTextToWaveform")
__all__.append("NeMoAutoTokenizer")
except:
# optional dependency might be missing,
# leave the name off the module namespace so other imports still work
Expand Down
2 changes: 2 additions & 0 deletions nemo_automodel/_transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
NeMoAutoModelForSequenceClassification,
NeMoAutoModelForTextToWaveform,
)
from nemo_automodel._transformers.auto_tokenizer import NeMoAutoTokenizer

__all__ = [
"NeMoAutoModelForCausalLM",
"NeMoAutoModelForImageTextToText",
"NeMoAutoModelForSequenceClassification",
"NeMoAutoModelForTextToWaveform",
"NeMoAutoTokenizer",
]
Loading
Loading