NVIDIA · kevalmorabia97 · Aug 8, 2025 · Aug 7, 2025
@@ -21,7 +21,7 @@ jobs:
     runs-on: linux-amd64-gpu-h100-latest-1
     timeout-minutes: 60
     container:
-      image: nvcr.io/nvidia/pytorch:25.04-py3
+      image: nvcr.io/nvidia/pytorch:25.06-py3
       env:
         GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
         LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.

@@ -0,0 +1,49 @@
+# Run unit tests with older supported Python and Torch versions
+name: Multi version tests
+
+on:
+  pull_request:
+    branches: [main, release/*]
+    paths:
+      - ".github/workflows/multi_version_unit_tests.yml"
+      - "modelopt/**"
+      - "tests/**"
+      - "setup.py"
+      - "tox.ini"
+
+# Cancel previous runs if new commit is pushed to the same PR
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  multi-py-unit:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      matrix:
+        py: [10, 11]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.${{ matrix.py }}"
+      - name: Install dependencies
+        run: pip install tox
+      - name: Run unit tests
+        run: tox -e py3${{ matrix.py }}-torch28-unit
+  multi-torch-unit:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      matrix:
+        torch: [25, 26, 27]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install dependencies
+        run: pip install tox
+      - name: Run unit tests
+        run: tox -e py312-torch${{ matrix.torch }}-unit
@@ -0,0 +1,33 @@
+name: Partial install unit tests
+
+on:
+  pull_request:
+    branches: [main, release/*]
+    paths:
+      - ".github/workflows/partial_unit_tests.yml"
+      - "modelopt/**"
+      - "tests/**"
+      - "setup.py"
+      - "tox.ini"
+
+# Cancel previous runs if new commit is pushed to the same PR
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  partial-unit:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      matrix:
+        test-env: [onnx, torch]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install dependencies
+        run: pip install tox
+      - name: Run unit tests
+        run: tox -e py312-partial-unit-${{ matrix.test-env }}
@@ -17,8 +17,11 @@ concurrency:
 
 jobs:
   unit-tests:
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
     timeout-minutes: 30
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest]
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
@@ -27,4 +30,4 @@ jobs:
       - name: Install dependencies
         run: pip install tox
       - name: Run unit tests
-        run: tox -e py312-torch27-unit
+        run: tox -e py312-torch28-unit
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -8,6 +8,8 @@ Model Optimizer Changelog (Linux)
 
 **Deprecations**
 
+- Deprecate ``torch<2.5`` support.
+
 **New Features**
 
 - (Experimental) Add quantization support for custom TensorRT op in ONNX models.

@@ -22,9 +22,9 @@
 
 from . import distill, nas, opt, prune, quantization, sparsity, speculative, utils
 
-if _Version(_torch_version) < _Version("2.5"):
+if _Version(_torch_version) < _Version("2.6"):
     _warnings.warn(
-        "nvidia-modelopt will drop torch<2.5 support in a future release.", DeprecationWarning
+        "nvidia-modelopt will drop torch<2.6 support in a future release.", DeprecationWarning
     )
 
 # Since `hf` dependencies are optional and users have pre-installed transformers, we need to ensure

@@ -411,15 +411,18 @@ def get_onnx_bytes_and_metadata(
     )
     with torch.inference_mode(), autocast, quantizer_context:
         if not dynamo_export or Version(torch.__version__) >= Version("2.6"):
+            additional_kwargs = {}
+            if not dynamo_export and Version(torch.__version__) >= Version("2.8"):
+                additional_kwargs["dynamic_axes"] = dynamic_axes
             torch.onnx.export(
                 model,
                 dummy_input,
                 onnx_save_path,
                 input_names=input_names,
                 output_names=output_names,
                 opset_version=onnx_opset,
-                dynamic_axes=dynamic_axes,
                 dynamo=dynamo_export,
+                **additional_kwargs,
             )
         else:  # torch < 2.6 with dynamo export
             export_options = torch.onnx.ExportOptions(dynamic_shapes=True)

@@ -40,9 +40,9 @@ def get_sliced_tensor_by_slices(
     tensor_sliced = tensor
     for i, _ in enumerate(slices):
         if sum(not isinstance(s, slice) for s in slices) < 2:
-            tensor_sliced = tensor_sliced[slices]
+            tensor_sliced = tensor_sliced[tuple(slices)]
             break
-        tensor_sliced = tensor_sliced[slices[: i + 1]]
+        tensor_sliced = tensor_sliced[tuple(slices[: i + 1])]
         slices[i] = slice(None)  # replace with a vanilla slice ("[:]") for next slicing iteration
 
     # return sliced, contiguous tensor

@@ -43,7 +43,7 @@
     "pulp",
     "regex",
     "safetensors",
-    "torch>=2.4",
+    "torch>=2.5",
     "torchprofile>=0.0.4",
     "torchvision",
 ]
@@ -58,7 +58,7 @@
         "onnxconverter-common",
         "onnxruntime~=1.22.0 ; platform_machine == 'aarch64' or platform_system == 'Darwin'",
         "onnxruntime-gpu~=1.22.0 ; platform_machine != 'aarch64' and platform_system != 'Darwin' and platform_system != 'Windows'",  # noqa: E501
-        "onnxruntime-gpu==1.20.0; platform_system == 'Windows'",
+        "onnxruntime-directml==1.20.0; platform_system == 'Windows'",
         "onnxscript",  # For test_onnx_dynamo_export unit test
         "onnxsim ; python_version < '3.12' and platform_machine != 'aarch64'",
         "polygraphy>=0.49.22",

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import platform
+
 import pytest
 
 
@@ -36,3 +38,9 @@ def pytest_collection_modifyitems(config, items):
         for item in items:
             if "manual" in item.keywords:
                 item.add_marker(skipper)
+
+
+@pytest.fixture
+def skip_on_windows():
+    if platform.system() == "Windows":
+        pytest.skip("Skipping on Windows")
diff --git a/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py b/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py
@@ -51,7 +51,7 @@
 @pytest.mark.parametrize(
     "model", deploy_benchmark_dynamo.values(), ids=deploy_benchmark_dynamo.keys()
 )
-def test_onnx_dynamo_export(model: BaseDeployModel):
+def test_onnx_dynamo_export(skip_on_windows, model: BaseDeployModel):
     # try it for all potential numeric types
     for active in range(model.get.num_choices):
         # retrieve args

diff --git a/tests/unit/torch/nas/plugins/test_hf_nas_save_restore.py b/tests/unit/torch/nas/plugins/test_hf_nas_save_restore.py
@@ -16,22 +16,16 @@
 import os
 
 import pytest
-import torch
 from _test_utils.opt_utils import apply_mode_with_sampling
 
 pytest.importorskip("transformers")
-import transformers
 from _test_utils.torch_model.transformers_models import (
     create_tiny_bert_dir,
     tf_modelopt_state_and_output_tester,
 )
 from transformers import AutoModelForQuestionAnswering, BertForQuestionAnswering
 
 
-@pytest.mark.skipif(
-    transformers.__version__ >= "4.52" and torch.__version__ <= "2.4",
-    reason="Skip when transformers > 4.52 and torch <= 2.4",
-)
 def test_pruned_transformers_save_restore(tmp_path):
     tiny_bert_dir = create_tiny_bert_dir(tmp_path)
     model_ref = BertForQuestionAnswering.from_pretrained(tiny_bert_dir)

diff --git a/tests/unit/torch/quantization/test_autoquant.py b/tests/unit/torch/quantization/test_autoquant.py
@@ -289,7 +289,7 @@ def _test_data_parallel_auto_quantize(rank, size):
     assert search_history["best"]["is_satisfied"]
 
 
-def test_data_parallel_auto_quantize():
+def test_data_parallel_auto_quantize(skip_on_windows):
     spawn_multiprocess_job(4, _test_data_parallel_auto_quantize, backend="gloo")
 
 

diff --git a/tests/unit/torch/quantization/test_dist.py b/tests/unit/torch/quantization/test_dist.py
@@ -43,5 +43,5 @@ def forward_loop(model):
     dist.destroy_process_group()
 
 
-def test_data_parallel():
+def test_data_parallel(skip_on_windows):
     spawn_multiprocess_job(2, _test_data_parallel_helper, backend="gloo")
diff --git a/tests/unit/torch/quantization/test_quant_rnn.py b/tests/unit/torch/quantization/test_quant_rnn.py
@@ -54,14 +54,18 @@ def test_no_quant(self, original_cls, bidirectional, bias):
         rnn_object_original.eval()
         set_quantizer_attribute(quant_rnn_object, lambda name: True, {"enable": False})
 
-        assert torch.allclose(quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0)
-        assert torch.allclose(quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0)
+        assert torch.allclose(
+            quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0, atol=1e-6
+        )
+        assert torch.allclose(
+            quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0, atol=1e-6
+        )
 
         test_input = torch.randn(INPUT_SHAPE)
 
         out1 = quant_rnn_object(test_input)[0]
         out2 = rnn_object_original(test_input)[0]
-        assert torch.allclose(out1, out2)
+        assert torch.allclose(out1, out2, atol=1e-6)
 
     @pytest.mark.parametrize(
         ("original_cls", "bidirectional", "bias"),
@@ -84,8 +88,12 @@ def test_no_quant_packed_sequence(self, original_cls, bidirectional, bias):
         rnn_object_original.eval()
         set_quantizer_attribute(quant_rnn_object, lambda name: True, {"enable": False})
 
-        assert torch.allclose(quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0)
-        assert torch.allclose(quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0)
+        assert torch.allclose(
+            quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0, atol=1e-6
+        )
+        assert torch.allclose(
+            quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0, atol=1e-6
+        )
 
         test_input = [
             torch.randn([INPUT_SHAPE[0] - 1, INPUT_SHAPE[2]]),
@@ -95,7 +103,7 @@ def test_no_quant_packed_sequence(self, original_cls, bidirectional, bias):
 
         out1 = quant_rnn_object(test_input)[0]
         out2 = rnn_object_original(test_input)[0]
-        assert torch.allclose(out1[0], out2[0])
+        assert torch.allclose(out1[0], out2[0], atol=1e-6)
 
     @pytest.mark.parametrize(
         ("original_cls", "bidirectional", "bias"),
@@ -122,7 +130,7 @@ def test_no_quant_proj(self, original_cls, bidirectional, bias):
 
         out1 = quant_rnn_object(test_input)[0]
         out2 = rnn_object_original(test_input)[0]
-        assert torch.allclose(out1, out2)
+        assert torch.allclose(out1, out2, atol=1e-6)
 
     @pytest.mark.parametrize(
         ("original_cls", "bidirectional"),
@@ -148,7 +156,7 @@ def test_no_quant_batch_first(self, original_cls, bidirectional):
 
         out1 = quant_rnn_object(test_input)[0]
         out2 = rnn_object_original(test_input)[0]
-        assert torch.allclose(out1, out2)
+        assert torch.allclose(out1, out2, atol=1e-6)
 
     @pytest.mark.parametrize(
         ("original_cls", "bidirectional"),
@@ -180,7 +188,7 @@ def test_fake_quant_per_tensor(self, original_cls, bidirectional):
 
         out1 = quant_rnn_object(test_input)[0]
         out2 = rnn_object_original(test_input)[0]
-        assert torch.allclose(out1, out2)
+        assert torch.allclose(out1, out2, atol=1e-6)
 
     @pytest.mark.parametrize(
         ("original_cls", "bidirectional"),
@@ -211,7 +219,7 @@ def test_fake_quant_per_channel(self, original_cls, bidirectional):
 
         out1 = quant_rnn_object(test_input)[0]
         out2 = rnn_object_original(test_input)[0]
-        assert torch.allclose(out1, out2, atol=1e-5)
+        assert torch.allclose(out1, out2, atol=1e-6)
 
     @pytest.mark.parametrize(
         ("original_cls", "bidirectional"),
@@ -258,4 +266,4 @@ def test_input_quant_per_tensor(self, original_cls, bidirectional):
             bidirectional,
             False,
         )[0]
-        assert torch.allclose(out1, out2)
+        assert torch.allclose(out1, out2, atol=1e-6)
@@ -1,24 +1,24 @@
 [tox]
 envlist=
     pre-commit-all
-    py312-torch27-{unit,gpu}
+    py312-torch28-{unit,gpu}
 skipsdist = True
 toxworkdir = /tmp/{env:USER}-modelopt-tox
 
 
 ############################
 # CPU Unit test environments
 ############################
-[testenv:{py39,py310,py311,py312}-torch{24,25,26,27}-unit]
+[testenv:{py310,py311,py312}-torch{25,26,27,28}-unit]
 deps =
     # Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
     py312: onnxsim
 
     # torch version auto-selected based on torchvision version
-    torch24: torchvision~=0.19.0
     torch25: torchvision~=0.20.0
     torch26: torchvision~=0.21.0
     torch27: torchvision~=0.22.0
+    torch28: torchvision~=0.23.0
 
     -e .[all,dev-test]
 commands =
@@ -28,7 +28,7 @@ commands =
 #####################################################################
 # Environment to run unit tests with subset of dependencies installed
 #####################################################################
-[testenv:{py39,py310,py311,py312}-ext-unit-{onnx,torch,torch_deploy}]
+[testenv:{py310,py311,py312}-partial-unit-{onnx,torch,torch_deploy}]
 allowlist_externals =
     bash, rm
 deps =
@@ -53,7 +53,7 @@ commands =
 ########################################################
 # GPU test environments (Can be used with --current-env)
 ########################################################
-[testenv:{py39,py310,py311,py312}-cuda12-gpu]
+[testenv:{py310,py311,py312}-cuda12-gpu]
 commands_pre =
     # Install deps here so that it gets installed even in --current-env
     pip install -U megatron-core