simplify from_pretrained/from_config

akoumpa · akoumpa · commit d22ea6a41336 · 2025-12-16T07:18:30.000-08:00
Signed-off-by: Alexandros Koumparoulis &lt;akoumparouli@nvidia.com&gt;
diff --git a/nemo_automodel/_transformers/auto_model.py b/nemo_automodel/_transformers/auto_model.py
@@ -23,16 +23,6 @@
 import torch
 import torch.distributed as dist
 from torch.nn.attention import SDPBackend, sdpa_kernel
-from transformers import (
-    AutoConfig,
-    AutoModelForCausalLM,
-    AutoModelForImageTextToText,
-    AutoModelForSequenceClassification,
-    AutoModelForTextToWaveform,
-    PreTrainedModel,
-)
-from transformers.modeling_utils import _get_resolved_checkpoint_files
-from transformers.models.auto.auto_factory import _BaseAutoModelClass
 
 from nemo_automodel import __version__
 from nemo_automodel._transformers.registry import ModelRegistry
@@ -41,9 +31,20 @@
     get_local_world_size_preinit,
     get_world_size_safe,
 )
+from nemo_automodel.components.distributed.utils import FirstRankPerNode
 from nemo_automodel.components.utils.model_utils import resolve_trust_remote_code
 from nemo_automodel.shared.import_utils import safe_import
 from nemo_automodel.shared.utils import dtype_from_str
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoModelForImageTextToText,
+    AutoModelForSequenceClassification,
+    AutoModelForTextToWaveform,
+    PreTrainedModel,
+)
+from transformers.modeling_utils import _get_resolved_checkpoint_files
+from transformers.models.auto.auto_factory import _BaseAutoModelClass
 
 HAS_LIGER_KERNEL, liger_kernel_trf = safe_import("liger_kernel.transformers")
 logger = logging.getLogger(__name__)
@@ -216,6 +217,38 @@ def _verify_sdpa_support(model, is_hf_model, cp_size):
             raise ValueError("Model does not support SDPA required for context parallelism")
 
 
+def _download_model_weights(hf_config, pretrained_model_name_or_path):
+    if (not dist.is_initialized() or get_local_rank_preinit() == 0) and not os.path.isdir(
+        pretrained_model_name_or_path
+    ):
+        num_nodes = (get_world_size_safe() % get_local_world_size_preinit()) + 1  # 1-indexed
+        if num_nodes > 1:
+            logging.info(
+                f"""Downloading model weights on {num_nodes} nodes. This incurs high storage usage. 
+                It is recommended to download once with `hf download` and pass in the downloaded path to the `pretrained_model_name_or_path` argument."""
+            )
+        with FirstRankPerNode():
+            _get_resolved_checkpoint_files(
+                pretrained_model_name_or_path=pretrained_model_name_or_path,
+                subfolder="",
+                variant=None,
+                gguf_file=None,
+                from_tf=False,
+                from_flax=False,
+                use_safetensors=None,
+                cache_dir=None,
+                force_download=False,
+                proxies=None,
+                local_files_only=False,
+                token=None,
+                user_agent={"file_type": "model", "framework": "pytorch", "from_auto_class": False},
+                revision="main",
+                commit_hash=getattr(hf_config, "_commit_hash", None),
+                is_remote_code=False,
+                transformers_explicit_filename=None,
+            )
+
+
 class _BaseNeMoAutoModelClass(_BaseAutoModelClass):
     """
     Drop-in replacement for ``_BaseAutoModelClass`` that includes custom-kernels.
@@ -238,6 +271,24 @@ class _BaseNeMoAutoModelClass(_BaseAutoModelClass):
       Liger patch.  Unsupported models will silently fall back.
     """
 
+    @classmethod
+    def _from_pretrained_parent_class(cls, *args, **kwargs):
+        name = cls.__name__
+        if name.startswith("NeMo"):
+            cls.__name__ = name[4:]
+        model = super().from_pretrained(*args, **kwargs)
+        cls.__name__ = name
+        return model
+
+    @classmethod
+    def _from_config_parent_class(cls, *args, **kwargs):
+        name = cls.__name__
+        if name.startswith("NeMo"):
+            cls.__name__ = name[4:]
+        model = super().from_config(*args, **kwargs)
+        cls.__name__ = name
+        return model
+
     @classmethod
     def from_pretrained(
         cls,
@@ -324,66 +375,36 @@ def _retry(**override):
                 **kwargs,
             )
 
-        # load model
+        # 1. if force_hf is True, we will use the parent class to load and return the model as is
+        if force_hf:
+            return _BaseNeMoAutoModelClass._from_pretrained_parent_class(
+                pretrained_model_name_or_path,
+                *model_args,
+                torch_dtype=torch_dtype,
+                attn_implementation=attn_implementation,
+                **kwargs,
+            )
+
+        # 2. If we have a custom model implementation available, we prioritize that over HF
+        if hf_config.architectures[0] in ModelRegistry.model_arch_name_to_cls:
+            # if we are able to init the custom model, we will now download the model weights on local rank 0
+            _download_model_weights(hf_config, pretrained_model_name_or_path)
+            logger.info(f"Using custom model implementation for {hf_config.architectures[0]}")
+            kwargs.pop("trust_remote_code", None)
+            return ModelRegistry.model_arch_name_to_cls[hf_config.architectures[0]](hf_config, *model_args, **kwargs)
+
+        # 3. fallback to parent class
         model = None
         try:
-            name = cls.__name__
-            if name.startswith("NeMo"):
-                cls.__name__ = name[4:]
-            if not force_hf:
-                try:
-                    # if we have a custom model implementation available, we prioritize that over HF
-                    if hf_config.architectures[0] in ModelRegistry.model_arch_name_to_cls:
-                        kwargs.pop("trust_remote_code", None)
-                        model = ModelRegistry.model_arch_name_to_cls[hf_config.architectures[0]](
-                            hf_config, *model_args, **kwargs
-                        )
-                        # if we are able to init the custom model, we will now download the model weights on local rank 0
-                        if (not dist.is_initialized() or get_local_rank_preinit() == 0) and not os.path.isdir(
-                            pretrained_model_name_or_path
-                        ):
-                            num_nodes = (get_world_size_safe() % get_local_world_size_preinit()) + 1  # 1-indexed
-                            if num_nodes > 1:
-                                logging.info(
-                                    f"""Downloading model weights on {num_nodes} nodes. This incurs high storage usage. 
-                                    It is recommended to download once with `hf download` and pass in the downloaded path to the `pretrained_model_name_or_path` argument."""
-                                )
-                            _get_resolved_checkpoint_files(
-                                pretrained_model_name_or_path=pretrained_model_name_or_path,
-                                subfolder="",
-                                variant=None,
-                                gguf_file=None,
-                                from_tf=False,
-                                from_flax=False,
-                                use_safetensors=None,
-                                cache_dir=None,
-                                force_download=False,
-                                proxies=None,
-                                local_files_only=False,
-                                token=None,
-                                user_agent={"file_type": "model", "framework": "pytorch", "from_auto_class": False},
-                                revision="main",
-                                commit_hash=getattr(hf_config, "_commit_hash", None),
-                                is_remote_code=False,
-                                transformers_explicit_filename=None,
-                            )
-                        if dist.is_initialized():
-                            dist.barrier()
-                        logger.info(f"Using custom model implementation for {hf_config.architectures[0]}")
-                        return model
-                except Exception as e:
-                    logger.error(f"Failed to use custom model implementation with error: {e}")
-
             if quantization_config is not None:
                 kwargs["quantization_config"] = quantization_config
-            model = super().from_pretrained(
+            model = _BaseNeMoAutoModelClass._from_pretrained_parent_class(
                 pretrained_model_name_or_path,
                 *model_args,
                 torch_dtype=torch_dtype,
                 attn_implementation=attn_implementation,
                 **kwargs,
             )
-            cls.__name__ = name
         except ValueError as e:
             if "does not support" in str(e):
                 if model is not None:
@@ -499,35 +520,32 @@ def _retry(**override):
                 **kwargs,
             )
 
-        # load model
+        # 1. if force_hf is True, we will use the parent class to load and return the model as is
+        if force_hf:
+            return _BaseNeMoAutoModelClass._from_config_parent_class(
+                config,
+                *model_args,
+                torch_dtype=torch_dtype,
+                attn_implementation=attn_implementation,
+                **kwargs,
+            )
+
+        # 2. If we have a custom model implementation available, we prioritize that over HF
+        if config.architectures[0] in ModelRegistry.model_arch_name_to_cls:
+            raise NotImplementedError("Custom model implementation is not supported for from_config")
+
+        # 3. fallback to parent class
         model = None
         try:
-            name = cls.__name__
-            if name.startswith("NeMo"):
-                cls.__name__ = name[4:]
-            if not force_hf:
-                try:
-                    # if we have a custom model implementation available, we prioritize that over HF
-                    if config.architectures[0] in ModelRegistry.model_arch_name_to_cls:
-                        kwargs.pop("trust_remote_code", None)
-                        model = ModelRegistry.model_arch_name_to_cls[config.architectures[0]](
-                            config, *model_args, **kwargs
-                        )
-                        logger.info(f"Using custom model implementation for {config.architectures[0]}")
-                        return model
-                except Exception as e:
-                    logger.error(f"Failed to use custom model implementation with error: {e}")
-
             if quantization_config is not None:
                 kwargs["quantization_config"] = quantization_config
-            model = super().from_config(
+            model = _BaseNeMoAutoModelClass._from_config_parent_class(
                 config,
                 *model_args,
-                attn_implementation=attn_implementation,
                 torch_dtype=torch_dtype,
+                attn_implementation=attn_implementation,
                 **kwargs,
             )
-            cls.__name__ = name
         except ValueError as e:
             if "does not support" in str(e):
                 logging.warning("Falling back to eager attention.")