Bump min transformers version from 4.48 to 4.53 (#558)

kevalmorabia97 · web-flow · commit 4911f9cd45b9 · 2025-11-15T00:31:18.000+05:30
- CI tests for min transformers version (4.48) started failing since new
peft 1.18 was released yesterday blocking all new PRs from merging
- Transformers v5.0 is right around the corner so we should not
recommend too older version
- Set new minimum transformers to 4.53 instead of 4.48
- nemo:25.07+ and trtllm:1.0.0+ docker containers have transformers
4.53+
- ModelOpt will still likely work for transformers 4.48-4.52 for now but
going forward will not be tested in CICD

Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,8 +6,14 @@ Model Optimizer Changelog (Linux)
 **Deprecations**
 
 **New Features**
+
 - Add support for PyTorch Geometric quantization.
 
+**Misc**
+
+- Bump minimum recommended transformers version to 4.53.
+
+
 0.40 (2025-12-xx)
 ^^^^^^^^^^^^^^^^^
 
diff --git a/modelopt/torch/__init__.py b/modelopt/torch/__init__.py
@@ -32,7 +32,7 @@
 try:
     from transformers import __version__ as _transformers_version
 
-    if not (_Version("4.48") <= _Version(_transformers_version) < _Version("5.0")):
+    if not (_Version("4.53") <= _Version(_transformers_version) < _Version("5.0")):
         _warnings.warn(
             f"transformers version {_transformers_version} is not tested with nvidia-modelopt and may cause issues. "
             "Please install recommended version with `pip install nvidia-modelopt[hf]` if working with HF models.",
diff --git a/modelopt/torch/opt/plugins/transformers.py b/modelopt/torch/opt/plugins/transformers.py
@@ -15,12 +15,10 @@
 
 """ModelOpt plugin for enabling automatic save/restore of ModelOpt state for HuggingFace models."""
 
-import os
 import types
 from contextlib import contextmanager
 
 import torch
-import transformers
 from transformers import PreTrainedModel, Trainer, TrainerCallback
 from transformers import modeling_utils as tf_modeling_utils
 
@@ -64,13 +62,6 @@ def _undo_torch_init_override_by_transformers():
 
 def _new_from_pretrained(cls, /, pretrained_model_name_or_path, *args, **kwargs):
     """Patch for `cls.from_pretrained` method to restore ModelOpt state."""
-    if kwargs.get("tp_plan") is not None or (
-        kwargs.get("device_map") == "auto" and os.environ.get("WORLD_SIZE")
-    ):
-        assert transformers.__version__ >= "4.52.0", (
-            "Tensor parallelism with ModelOpt requires transformers >= 4.52.0"
-        )
-
     with _patch_model_init_for_modelopt(
         cls, pretrained_model_name_or_path, extra_context=_undo_torch_init_override_by_transformers
     ):
diff --git a/modelopt/torch/quantization/plugins/huggingface.py b/modelopt/torch/quantization/plugins/huggingface.py
@@ -124,8 +124,6 @@ def is_compatible_attention(attn):
         # In addition, the new attention interface is not available for some models such as T5
         # Hence lets do a crude check here to see if the attention module is using the new_attention_interface
         # This is not foolproof but should work for most cases
-        if transformers.__version__ < "4.48.0":
-            return False
         module = inspect.getmodule(attn)
         return getattr(module, "ALL_ATTENTION_FUNCTIONS", None) is not None
 
diff --git a/modelopt/torch/quantization/plugins/transformers_trainer.py b/modelopt/torch/quantization/plugins/transformers_trainer.py
@@ -31,16 +31,17 @@
 from modelopt.torch.distill.plugins.huggingface import KDTrainer
 from modelopt.torch.opt.conversion import restore_from_modelopt_state
 from modelopt.torch.opt.plugins import ModelOptHFTrainer
-from modelopt.torch.quantization.config import QuantizeConfig
-from modelopt.torch.quantization.nn import TensorQuantizer
-from modelopt.torch.quantization.utils import (
+from modelopt.torch.utils import print_rank_0
+
+from ..config import QuantizeConfig
+from ..nn import TensorQuantizer
+from ..utils import (
     calibrate_with_adapters,
     disable_lora_quantizers_in_config,
     get_quantizer_state_dict,
     is_quantized,
     set_quantizer_state_dict,
 )
-from modelopt.torch.utils import print_rank_0
 
 # TODO: Enable documentation rendering for this class
 
diff --git a/setup.py b/setup.py
@@ -61,7 +61,7 @@
         "diffusers>=0.32.2",
         "huggingface_hub>=0.24.0",
         "peft>=0.17.0",
-        "transformers>=4.48,<5.0",  # Should match modelopt/torch/__init__.py and tox.ini
+        "transformers>=4.53,<5.0",  # Should match modelopt/torch/__init__.py and tox.ini
         "deepspeed>=0.9.6 ; platform_system != 'Darwin' and platform_system != 'Windows'",
     ],
     # linter tools
diff --git a/tests/_test_utils/torch/transformers_models.py b/tests/_test_utils/torch/transformers_models.py
@@ -28,14 +28,13 @@
     BertForQuestionAnswering,
     LlamaConfig,
     LlamaForCausalLM,
+    Qwen3Config,
+    Qwen3ForCausalLM,
     T5Config,
     T5ForConditionalGeneration,
     T5Tokenizer,
 )
 
-if Version(transformers.__version__) >= Version("4.51"):
-    from transformers import Qwen3Config, Qwen3ForCausalLM
-
 if Version(transformers.__version__) >= Version("4.55"):
     from transformers import GptOssConfig, GptOssForCausalLM
 
@@ -46,8 +45,6 @@
 
 def get_tiny_qwen3(**config_kwargs) -> "Qwen3ForCausalLM":
     set_seed(SEED)
-    if Version(transformers.__version__) < Version("4.51"):
-        pytest.skip("Qwen3ForCausalLM is not supported in transformers < 4.51")
 
     kwargs = {
         "hidden_size": 32,
diff --git a/tests/gpu/torch/quantization/plugins/test_transformers_tp.py b/tests/gpu/torch/quantization/plugins/test_transformers_tp.py
@@ -19,12 +19,10 @@
 import pytest
 import torch
 from _test_utils.torch.distributed.utils import spawn_multiprocess_job
-from packaging.version import Version
 
 import modelopt.torch.quantization as mtq
 
 pytest.importorskip("transformers")
-import transformers
 from _test_utils.torch.transformers_models import create_tiny_llama_dir
 from transformers import AutoModelForCausalLM
 
@@ -44,7 +42,5 @@ def _test_transformers_tp(model_path, rank, size):
 
 
 def test_transformers_tp(need_2_gpus, tmp_path):
-    if Version(transformers.__version__) < Version("4.52.0"):
-        pytest.skip("This test requires transformers>=4.52.0")
     model_path = create_tiny_llama_dir(tmp_path)
     spawn_multiprocess_job(size=2, job=partial(_test_transformers_tp, model_path), backend="nccl")
diff --git a/tox.ini b/tox.ini
@@ -26,7 +26,7 @@ deps =
     -e .[all,dev-test]
 
     # Should match setup.py
-    tf_min: transformers~=4.48.0
+    tf_min: transformers~=4.53.0
 commands =
     python -m pytest tests/unit {env:COV_ARGS:}