Skip to content

Commit 4fe74cd

Browse files
authored
ci: add QLoRA test (#1017)
Signed-off-by: Alexandros Koumparoulis <[email protected]>
1 parent b7031bb commit 4fe74cd

File tree

4 files changed

+225
-0
lines changed

4 files changed

+225
-0
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
#!/bin/bash
16+
set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
17+
18+
export PYTHONPATH=${PYTHONPATH:-}:$(pwd)
19+
export CUDA_VISIBLE_DEVICES="0"
20+
21+
# NOTE: This functional test assumes the CI harness provides:
22+
# - $TEST_DATA_DIR (local tiny HF model fixtures)
23+
# - $HF_CACHE (local dataset fixtures)
24+
25+
TRANSFORMERS_OFFLINE=1 python -m torch.distributed.run \
26+
--master-port=29511 --nproc_per_node=1 --nnodes=1 -m coverage run --data-file=/workspace/.coverage --source=/workspace \
27+
-m pytest tests/functional_tests/training/test_qlora_tiny.py \
28+
--config tests/functional_tests/hf_peft/qlora_tiny_squad.yaml \
29+
--model.pretrained_model_name_or_path $TEST_DATA_DIR/hf_mixtral_2l/ \
30+
--dataset.tokenizer.pretrained_model_name_or_path $TEST_DATA_DIR/hf_mixtral_2l/ \
31+
--validation_dataset.tokenizer.pretrained_model_name_or_path $TEST_DATA_DIR/hf_mixtral_2l/ \
32+
--dataset.dataset_name $HF_CACHE/squad/ \
33+
--validation_dataset.dataset_name $HF_CACHE/squad/ \
34+
--step_scheduler.max_steps 2 \
35+
--step_scheduler.global_batch_size 2 \
36+
--step_scheduler.local_batch_size 1 \
37+
--checkpoint.enabled true \
38+
--checkpoint.checkpoint_dir checkpoints/ \
39+
--checkpoint.model_save_format safetensors \
40+
--checkpoint.save_consolidated false
41+
42+
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Tiny QLoRA smoke config used by functional tests.
16+
# The launcher script overrides the model + dataset paths to use local test fixtures.
17+
18+
step_scheduler:
19+
global_batch_size: 2
20+
local_batch_size: 1
21+
ckpt_every_steps: 2
22+
val_every_steps: null
23+
max_steps: 2
24+
25+
dist_env:
26+
backend: nccl
27+
timeout_minutes: 5
28+
29+
rng:
30+
_target_: nemo_automodel.components.training.rng.StatefulRNG
31+
seed: 42
32+
ranked: true
33+
34+
model:
35+
_target_: nemo_automodel.NeMoAutoModelForCausalLM.from_pretrained
36+
# Overridden by the functional-test launcher to a tiny local HF model.
37+
pretrained_model_name_or_path: hf-internal-testing/tiny-random-gpt2
38+
39+
peft:
40+
_target_: nemo_automodel.components._peft.lora.PeftConfig
41+
match_all_linear: true
42+
dim: 4
43+
alpha: 8
44+
dropout: 0.0
45+
46+
distributed:
47+
_target_: nemo_automodel.components.distributed.fsdp2.FSDP2Manager
48+
dp_size: none
49+
dp_replicate_size: 1
50+
tp_size: 1
51+
cp_size: 1
52+
sequence_parallel: false
53+
54+
quantization:
55+
load_in_4bit: true
56+
load_in_8bit: false
57+
bnb_4bit_compute_dtype: bfloat16
58+
bnb_4bit_use_double_quant: true
59+
bnb_4bit_quant_type: nf4
60+
bnb_4bit_quant_storage: bfloat16
61+
62+
loss_fn:
63+
_target_: nemo_automodel.components.loss.masked_ce.MaskedCrossEntropy
64+
65+
dataset:
66+
_target_: nemo_automodel.components.datasets.llm.squad.make_squad_dataset
67+
dataset_name: rajpurkar/squad
68+
split: train
69+
limit_dataset_samples: 128
70+
71+
packed_sequence:
72+
packed_sequence_size: 0
73+
74+
dataloader:
75+
_target_: torchdata.stateful_dataloader.StatefulDataLoader
76+
collate_fn: nemo_automodel.components.datasets.utils.default_collater
77+
shuffle: false
78+
79+
validation_dataset:
80+
_target_: nemo_automodel.components.datasets.llm.squad.make_squad_dataset
81+
dataset_name: rajpurkar/squad
82+
split: validation
83+
limit_dataset_samples: 64
84+
85+
validation_dataloader:
86+
_target_: torchdata.stateful_dataloader.StatefulDataLoader
87+
collate_fn: nemo_automodel.components.datasets.utils.default_collater
88+
89+
optimizer:
90+
_target_: torch.optim.AdamW
91+
betas: [0.9, 0.999]
92+
eps: 1.0e-8
93+
lr: 1.0e-4
94+
weight_decay: 0.0
95+
96+
checkpoint:
97+
enabled: true
98+
checkpoint_dir: checkpoints/
99+
model_save_format: safetensors
100+
save_consolidated: false
101+
102+

tests/functional_tests/hf_peft/test_hf_peft.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
HF_PEFT_FSDP2_CHECKPOINT_QAT_FILENAME = "L2_HF_PEFT_FSDP2_Checkpoint_qat.sh"
2121
HF_PEFT_Triton_FSDP2_CHECKPOINT_FILENAME = "L2_HF_PEFT_Triton_FSDP2_Checkpoint.sh"
2222
HF_PEFT_VLM_FSDP2_CHECKPOINT_FILENAME = "L2_HF_PEFT_VLM_FSDP2_Checkpoint.sh"
23+
HF_QLORA_TINY_FILENAME = "L2_HF_QLORA_Tiny.sh"
2324

2425
class TestHFPEFT:
2526
def test_hf_peft_fsdp2_checkpoint(self):
@@ -45,6 +46,13 @@ def test_hf_peft_triton_fsdp2_checkpoint(self):
4546
def test_hf_peft_vlm_fsdp2_checkpoint(self):
4647
try:
4748
run_test_script(TEST_FOLDER, HF_PEFT_VLM_FSDP2_CHECKPOINT_FILENAME)
49+
finally:
50+
# remove the checkpoint directory
51+
shutil.rmtree("checkpoints/", ignore_errors=True)
52+
53+
def test_hf_qlora_tiny(self):
54+
try:
55+
run_test_script(TEST_FOLDER, HF_QLORA_TINY_FILENAME)
4856
finally:
4957
# remove the checkpoint directory
5058
shutil.rmtree("checkpoints/", ignore_errors=True)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Tiny QLoRA smoke test (4-bit + LoRA) for the functional test suite."""
16+
17+
from __future__ import annotations
18+
19+
import sys
20+
21+
import pytest
22+
import torch
23+
24+
from nemo_automodel.components.config._arg_parser import parse_args_and_load_config
25+
from nemo_automodel.components.quantization.qlora import verify_qlora_quantization
26+
from nemo_automodel.recipes.llm.train_ft import TrainFinetuneRecipeForNextTokenPrediction
27+
28+
import datasets
29+
30+
datasets.disable_caching()
31+
32+
33+
def _get_cfg_path() -> str:
34+
argv = sys.argv[1:]
35+
for i, tok in enumerate(argv):
36+
if tok in ("--config", "-c"):
37+
if i + 1 >= len(argv):
38+
raise ValueError("Expected a path after --config")
39+
return argv[i + 1]
40+
raise ValueError("Expected --config/-c to be provided by the functional-test launcher")
41+
42+
43+
@pytest.mark.skipif(not torch.cuda.is_available(), reason="QLoRA functional test requires CUDA")
44+
def test_qlora_tiny_smoke():
45+
"""
46+
End-to-end smoke test:
47+
- load a tiny HF model in 4-bit (bitsandbytes)
48+
- apply LoRA adapters
49+
- run a couple of training steps
50+
- assert: model is quantized and LoRA params are trainable
51+
"""
52+
pytest.importorskip("bitsandbytes")
53+
54+
cfg = parse_args_and_load_config(_get_cfg_path())
55+
trainer = TrainFinetuneRecipeForNextTokenPrediction(cfg)
56+
trainer.setup()
57+
58+
# Single-stage model in this config
59+
model = trainer.model_parts[0]
60+
61+
is_quantized = bool(getattr(model, "is_loaded_in_4bit", False)) or bool(
62+
getattr(getattr(model, "config", None), "quantization_config", None)
63+
)
64+
is_quantized = is_quantized or verify_qlora_quantization(model)
65+
assert is_quantized, "Expected 4-bit quantization to be active for QLoRA"
66+
67+
trainable = [n for n, p in model.named_parameters() if p.requires_grad]
68+
assert any("lora" in n.lower() for n in trainable), f"Expected LoRA trainable params, got: {trainable[:20]}"
69+
70+
# Run a very short training loop (max_steps is controlled by the config/CLI overrides)
71+
trainer.run_train_validation_loop()
72+
73+

0 commit comments

Comments
 (0)