Skip to content

Commit 1ab6a76

Browse files
committed
unit tests
Signed-off-by: adil-a <adil.asif2000@hotmail.com>
1 parent 472f604 commit 1ab6a76

File tree

8 files changed

+66
-855
lines changed

8 files changed

+66
-855
lines changed

nemo_automodel/_diffusers/auto_diffusion_pipeline.py

Lines changed: 16 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -14,47 +14,23 @@
1414

1515
import logging
1616
import os
17-
import sys
18-
import types
1917
from typing import Any, Dict, Iterable, Optional, Tuple
2018

2119
import torch
2220
import torch.nn as nn
2321

24-
# diffusers is an optional dependency. Some CI environments may have it missing
25-
# or installed with incompatible transitive deps. Import defensively so that
26-
# helper functions in this module (and unit tests) can run without diffusers.
27-
from nemo_automodel.shared.import_utils import safe_import
28-
29-
try: # pragma: no cover - exercised indirectly via unit tests
30-
ok, diffusers = safe_import("diffusers")
31-
except Exception:
32-
# diffusers can fail with non-ImportError exceptions (e.g. missing optional deps).
33-
ok, diffusers = False, None
34-
35-
if ok and hasattr(diffusers, "DiffusionPipeline"):
36-
DiffusionPipeline = diffusers.DiffusionPipeline
37-
else: # pragma: no cover
38-
# Provide a minimal stub module/class so tests can patch
39-
# `diffusers.DiffusionPipeline.from_pretrained` even when diffusers fails to import.
40-
diffusers_stub = sys.modules.get("diffusers")
41-
if diffusers_stub is None:
42-
diffusers_stub = types.ModuleType("diffusers")
43-
sys.modules["diffusers"] = diffusers_stub
44-
45-
class DiffusionPipeline: # type: ignore[no-redef]
46-
@classmethod
47-
def from_pretrained(cls, *args, **kwargs):
48-
raise RuntimeError(
49-
"diffusers is required for NeMoAutoDiffusionPipeline.from_pretrained. "
50-
"Install a compatible diffusers + deps stack to use this feature."
51-
)
52-
53-
setattr(diffusers_stub, "DiffusionPipeline", DiffusionPipeline)
54-
5522
from nemo_automodel.components.distributed.fsdp2 import FSDP2Manager
5623
from nemo_automodel.shared.utils import dtype_from_str
5724

25+
# diffusers is an optional dependency
26+
try:
27+
from diffusers import DiffusionPipeline
28+
29+
DIFFUSERS_AVAILABLE = True
30+
except Exception:
31+
DIFFUSERS_AVAILABLE = False
32+
DiffusionPipeline = object
33+
5834
logger = logging.getLogger(__name__)
5935

6036

@@ -123,8 +99,13 @@ def from_pretrained(
12399
torch_dtype: Any = "auto",
124100
move_to_device: bool = True,
125101
**kwargs,
126-
) -> DiffusionPipeline:
127-
pipe: DiffusionPipeline = DiffusionPipeline.from_pretrained(
102+
) -> "DiffusionPipeline":
103+
if not DIFFUSERS_AVAILABLE:
104+
raise RuntimeError(
105+
"diffusers is required for NeMoAutoDiffusionPipeline.from_pretrained. "
106+
"Install diffusers with a compatible version."
107+
)
108+
pipe = DiffusionPipeline.from_pretrained(
128109
pretrained_model_name_or_path,
129110
*model_args,
130111
torch_dtype=torch_dtype,

nemo_automodel/components/models/mistral3/model.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def __init__(
144144
bos_token_id=bos_token_id,
145145
eos_token_id=eos_token_id,
146146
tie_word_embeddings=tie_word_embeddings,
147-
ignore_keys_at_rope_validation=["llama_4_scaling_beta"],
147+
ignore_keys_at_rope_validation={"llama_4_scaling_beta"},
148148
**kwargs,
149149
)
150150

@@ -195,7 +195,10 @@ def __init__(self, config: Ministral3Config, device=None):
195195

196196
self.config = config
197197

198-
self.rope_type = self.config.rope_parameters["type"]
198+
# Support both transformers v4 ("type") and v5 ("rope_type") key names
199+
self.rope_type = self.config.rope_parameters.get("rope_type") or self.config.rope_parameters.get(
200+
"type", "default"
201+
)
199202
rope_init_fn = self.compute_default_rope_parameters
200203
if self.rope_type != "default":
201204
rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]

nemo_automodel/components/models/nemotron_parse/model.py

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,6 @@ def forward(
248248
attention_mask: Optional[torch.Tensor] = None,
249249
encoder_hidden_states: Optional[torch.FloatTensor] = None,
250250
encoder_attention_mask: Optional[torch.LongTensor] = None,
251-
head_mask: Optional[torch.Tensor] = None,
252-
cross_attn_head_mask: Optional[torch.Tensor] = None,
253251
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
254252
inputs_embeds: Optional[torch.FloatTensor] = None,
255253
use_cache: Optional[bool] = None,
@@ -282,7 +280,7 @@ def forward(
282280

283281
if self.config._attn_implementation == "flash_attention_2":
284282
attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
285-
elif self.config._attn_implementation == "sdpa" and not output_attentions and cross_attn_head_mask is None:
283+
elif self.config._attn_implementation == "sdpa" and not output_attentions:
286284
attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
287285
attention_mask, input_shape, inputs_embeds, past_key_values_length
288286
)
@@ -294,7 +292,7 @@ def forward(
294292
if encoder_hidden_states is not None and encoder_attention_mask is not None:
295293
if self.config._attn_implementation == "flash_attention_2":
296294
encoder_attention_mask = encoder_attention_mask if 0 in encoder_attention_mask else None
297-
elif self.config._attn_implementation == "sdpa" and cross_attn_head_mask is None and not output_attentions:
295+
elif self.config._attn_implementation == "sdpa" and not output_attentions:
298296
encoder_attention_mask = _prepare_4d_attention_mask_for_sdpa(
299297
encoder_attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
300298
)
@@ -310,12 +308,6 @@ def forward(
310308
all_self_attns = () if output_attentions else None
311309
all_cross_attentions = () if (output_attentions and encoder_hidden_states is not None) else None
312310

313-
for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]):
314-
if attn_mask is not None and attn_mask.size()[0] != len(self.layers):
315-
raise ValueError(
316-
f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {attn_mask.size()[0]}."
317-
)
318-
319311
for idx, decoder_layer in enumerate(self.layers):
320312
if output_hidden_states:
321313
all_hidden_states += (hidden_states,)
@@ -331,23 +323,17 @@ def forward(
331323
attention_mask,
332324
encoder_hidden_states,
333325
encoder_attention_mask,
334-
head_mask[idx] if head_mask is not None else None,
335-
cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None,
336-
None,
326+
None, # past_key_values
337327
output_attentions,
338-
False,
328+
False, # use_cache
339329
)
340330
else:
341331
layer_outputs = decoder_layer(
342332
hidden_states,
343333
attention_mask=attention_mask,
344334
encoder_hidden_states=encoder_hidden_states,
345335
encoder_attention_mask=encoder_attention_mask,
346-
layer_head_mask=(head_mask[idx] if head_mask is not None else None),
347-
cross_attn_layer_head_mask=(
348-
cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None
349-
),
350-
past_key_value=None,
336+
past_key_values=None,
351337
output_attentions=output_attentions,
352338
use_cache=False,
353339
)

nemo_automodel/components/utils/model_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,9 +324,11 @@ def register_empty_parameter(module, name, param):
324324
# (e.g., TransformerEngine sets tensor_model_parallel on weights)
325325
if param_cls is nn.Parameter:
326326
kwargs = {"requires_grad": param.requires_grad}
327+
is_hf_initialized = None
327328
else:
328329
kwargs = module._parameters[name].__dict__.copy()
329330
kwargs["requires_grad"] = param.requires_grad
331+
is_hf_initialized = kwargs.pop("_is_hf_initialized", None)
330332
module._parameters[name] = param_cls(module._parameters[name].to(device), **kwargs)
331333
if is_hf_initialized is not None:
332334
setattr(module._parameters[name], "_is_hf_initialized", is_hf_initialized)

tests/unit_tests/_diffusers/test_auto_diffusion_pipeline.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@
1919
import pytest
2020
import torch
2121

22+
# Check if diffusers can be imported properly (may fail due to peft/transformers incompatibility)
23+
try:
24+
from nemo_automodel._diffusers.auto_diffusion_pipeline import _choose_device
25+
DIFFUSERS_AVAILABLE = True
26+
except Exception:
27+
DIFFUSERS_AVAILABLE = False
28+
29+
pytestmark = pytest.mark.skipif(not DIFFUSERS_AVAILABLE, reason="diffusers not available or incompatible with current transformers version")
2230

2331
MODULE_PATH = "nemo_automodel._diffusers.auto_diffusion_pipeline"
2432

@@ -128,16 +136,20 @@ def test_from_pretrained_basic_flow_moves_modules_and_returns_pipeline(caplog):
128136
m1, m2 = DummyModule(), DummyModule()
129137
dummy_pipe = DummyPipeline({"unet": m1, "text_encoder": m2})
130138

139+
mock_diffusion_pipeline = MagicMock()
140+
mock_diffusion_pipeline.from_pretrained.return_value = dummy_pipe
141+
131142
with (
132-
patch("diffusers.DiffusionPipeline.from_pretrained", return_value=dummy_pipe) as mock_hf_from,
143+
patch(f"{MODULE_PATH}.DIFFUSERS_AVAILABLE", True),
144+
patch(f"{MODULE_PATH}.DiffusionPipeline", mock_diffusion_pipeline),
133145
patch.object(torch.nn.Module, "to") as mock_to,
134146
patch(f"{MODULE_PATH}.torch.cuda.is_available", return_value=False),
135147
):
136148
caplog.set_level(logging.WARNING)
137149
out = NeMoAutoDiffusionPipeline.from_pretrained("dummy")
138150

139151
assert out is dummy_pipe
140-
assert mock_hf_from.call_count == 1
152+
assert mock_diffusion_pipeline.from_pretrained.call_count == 1
141153
# Both modules should be moved to device once
142154
assert mock_to.call_count == 2
143155

@@ -146,8 +158,12 @@ def test_from_pretrained_skips_move_when_flag_false():
146158
from nemo_automodel._diffusers.auto_diffusion_pipeline import NeMoAutoDiffusionPipeline
147159

148160
dummy_pipe = DummyPipeline({"unet": DummyModule()})
161+
mock_diffusion_pipeline = MagicMock()
162+
mock_diffusion_pipeline.from_pretrained.return_value = dummy_pipe
163+
149164
with (
150-
patch("diffusers.DiffusionPipeline.from_pretrained", return_value=dummy_pipe),
165+
patch(f"{MODULE_PATH}.DIFFUSERS_AVAILABLE", True),
166+
patch(f"{MODULE_PATH}.DiffusionPipeline", mock_diffusion_pipeline),
151167
patch.object(torch.nn.Module, "to") as mock_to,
152168
):
153169
out = NeMoAutoDiffusionPipeline.from_pretrained("dummy", move_to_device=False)
@@ -172,8 +188,12 @@ def test_from_pretrained_parallel_scheme_applies_managers_and_sets_attrs():
172188

173189
parallel_scheme = {"unet": mgr_unet, "text_encoder": mgr_text}
174190

191+
mock_diffusion_pipeline = MagicMock()
192+
mock_diffusion_pipeline.from_pretrained.return_value = dummy_pipe
193+
175194
with (
176-
patch("diffusers.DiffusionPipeline.from_pretrained", return_value=dummy_pipe),
195+
patch(f"{MODULE_PATH}.DIFFUSERS_AVAILABLE", True),
196+
patch(f"{MODULE_PATH}.DiffusionPipeline", mock_diffusion_pipeline),
177197
patch(f"{MODULE_PATH}.torch.distributed.is_initialized", return_value=True),
178198
):
179199
out = NeMoAutoDiffusionPipeline.from_pretrained("dummy", parallel_scheme=parallel_scheme, move_to_device=False)
@@ -196,8 +216,12 @@ def test_from_pretrained_parallel_scheme_logs_and_continues_on_errors(caplog):
196216
mgr = Mock()
197217
mgr.parallelize.side_effect = RuntimeError("boom")
198218

219+
mock_diffusion_pipeline = MagicMock()
220+
mock_diffusion_pipeline.from_pretrained.return_value = dummy_pipe
221+
199222
with (
200-
patch("diffusers.DiffusionPipeline.from_pretrained", return_value=dummy_pipe),
223+
patch(f"{MODULE_PATH}.DIFFUSERS_AVAILABLE", True),
224+
patch(f"{MODULE_PATH}.DiffusionPipeline", mock_diffusion_pipeline),
201225
patch(f"{MODULE_PATH}.torch.distributed.is_initialized", return_value=True),
202226
caplog.at_level(logging.WARNING),
203227
):

0 commit comments

Comments
 (0)