From ee32fa8856d343df0c501e0a5f4165931578aacb Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Thu, 7 Aug 2025 23:59:50 +0530 Subject: [PATCH] Add CI tests for partial installs and multi-versions + torch 2.8 fix Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --- .github/workflows/gpu_tests.yml | 2 +- .../workflows/multi_version_unit_tests.yml | 49 +++++++++++++++++++ .github/workflows/partial_unit_tests.yml | 33 +++++++++++++ .github/workflows/unit_tests.yml | 7 ++- CHANGELOG.rst | 2 + modelopt/torch/__init__.py | 4 +- modelopt/torch/_deploy/utils/torch_onnx.py | 5 +- modelopt/torch/nas/modules/utils.py | 4 +- setup.py | 4 +- tests/conftest.py | 8 +++ .../deploy/utils/test_torch_onnx_utils.py | 2 +- .../nas/plugins/test_hf_nas_save_restore.py | 6 --- .../unit/torch/quantization/test_autoquant.py | 2 +- tests/unit/torch/quantization/test_dist.py | 2 +- .../unit/torch/quantization/test_quant_rnn.py | 30 +++++++----- tox.ini | 10 ++-- 16 files changed, 135 insertions(+), 35 deletions(-) create mode 100644 .github/workflows/multi_version_unit_tests.yml create mode 100644 .github/workflows/partial_unit_tests.yml diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml index 327e6633..f91c29be 100644 --- a/.github/workflows/gpu_tests.yml +++ b/.github/workflows/gpu_tests.yml @@ -21,7 +21,7 @@ jobs: runs-on: linux-amd64-gpu-h100-latest-1 timeout-minutes: 60 container: - image: nvcr.io/nvidia/pytorch:25.04-py3 + image: nvcr.io/nvidia/pytorch:25.06-py3 env: GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path. diff --git a/.github/workflows/multi_version_unit_tests.yml b/.github/workflows/multi_version_unit_tests.yml new file mode 100644 index 00000000..2e43bfa2 --- /dev/null +++ b/.github/workflows/multi_version_unit_tests.yml @@ -0,0 +1,49 @@ +# Run unit tests with older supported Python and Torch versions +name: Multi version tests + +on: + pull_request: + branches: [main, release/*] + paths: + - ".github/workflows/multi_version_unit_tests.yml" + - "modelopt/**" + - "tests/**" + - "setup.py" + - "tox.ini" + +# Cancel previous runs if new commit is pushed to the same PR +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + multi-py-unit: + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + matrix: + py: [10, 11] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.${{ matrix.py }}" + - name: Install dependencies + run: pip install tox + - name: Run unit tests + run: tox -e py3${{ matrix.py }}-torch28-unit + multi-torch-unit: + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + matrix: + torch: [25, 26, 27] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install dependencies + run: pip install tox + - name: Run unit tests + run: tox -e py312-torch${{ matrix.torch }}-unit diff --git a/.github/workflows/partial_unit_tests.yml b/.github/workflows/partial_unit_tests.yml new file mode 100644 index 00000000..cc540c9c --- /dev/null +++ b/.github/workflows/partial_unit_tests.yml @@ -0,0 +1,33 @@ +name: Partial install unit tests + +on: + pull_request: + branches: [main, release/*] + paths: + - ".github/workflows/partial_unit_tests.yml" + - "modelopt/**" + - "tests/**" + - "setup.py" + - "tox.ini" + +# Cancel previous runs if new commit is pushed to the same PR +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + partial-unit: + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + matrix: + test-env: [onnx, torch] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install dependencies + run: pip install tox + - name: Run unit tests + run: tox -e py312-partial-unit-${{ matrix.test-env }} diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index b279a276..52b45496 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -17,8 +17,11 @@ concurrency: jobs: unit-tests: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} timeout-minutes: 30 + strategy: + matrix: + os: [ubuntu-latest, windows-latest] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -27,4 +30,4 @@ jobs: - name: Install dependencies run: pip install tox - name: Run unit tests - run: tox -e py312-torch27-unit + run: tox -e py312-torch28-unit diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f57ae87b..c1956a92 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,6 +8,8 @@ Model Optimizer Changelog (Linux) **Deprecations** +- Deprecate ``torch<2.5`` support. + **New Features** - (Experimental) Add quantization support for custom TensorRT op in ONNX models. diff --git a/modelopt/torch/__init__.py b/modelopt/torch/__init__.py index 965d08a8..ef0fc432 100644 --- a/modelopt/torch/__init__.py +++ b/modelopt/torch/__init__.py @@ -22,9 +22,9 @@ from . import distill, nas, opt, prune, quantization, sparsity, speculative, utils -if _Version(_torch_version) < _Version("2.5"): +if _Version(_torch_version) < _Version("2.6"): _warnings.warn( - "nvidia-modelopt will drop torch<2.5 support in a future release.", DeprecationWarning + "nvidia-modelopt will drop torch<2.6 support in a future release.", DeprecationWarning ) # Since `hf` dependencies are optional and users have pre-installed transformers, we need to ensure diff --git a/modelopt/torch/_deploy/utils/torch_onnx.py b/modelopt/torch/_deploy/utils/torch_onnx.py index c2fd3bae..5a126db9 100644 --- a/modelopt/torch/_deploy/utils/torch_onnx.py +++ b/modelopt/torch/_deploy/utils/torch_onnx.py @@ -411,6 +411,9 @@ def get_onnx_bytes_and_metadata( ) with torch.inference_mode(), autocast, quantizer_context: if not dynamo_export or Version(torch.__version__) >= Version("2.6"): + additional_kwargs = {} + if not dynamo_export and Version(torch.__version__) >= Version("2.8"): + additional_kwargs["dynamic_axes"] = dynamic_axes torch.onnx.export( model, dummy_input, @@ -418,8 +421,8 @@ def get_onnx_bytes_and_metadata( input_names=input_names, output_names=output_names, opset_version=onnx_opset, - dynamic_axes=dynamic_axes, dynamo=dynamo_export, + **additional_kwargs, ) else: # torch < 2.6 with dynamo export export_options = torch.onnx.ExportOptions(dynamic_shapes=True) diff --git a/modelopt/torch/nas/modules/utils.py b/modelopt/torch/nas/modules/utils.py index 7dbec306..1a9afaa2 100644 --- a/modelopt/torch/nas/modules/utils.py +++ b/modelopt/torch/nas/modules/utils.py @@ -40,9 +40,9 @@ def get_sliced_tensor_by_slices( tensor_sliced = tensor for i, _ in enumerate(slices): if sum(not isinstance(s, slice) for s in slices) < 2: - tensor_sliced = tensor_sliced[slices] + tensor_sliced = tensor_sliced[tuple(slices)] break - tensor_sliced = tensor_sliced[slices[: i + 1]] + tensor_sliced = tensor_sliced[tuple(slices[: i + 1])] slices[i] = slice(None) # replace with a vanilla slice ("[:]") for next slicing iteration # return sliced, contiguous tensor diff --git a/setup.py b/setup.py index a1c4dcf4..e81acadb 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ "pulp", "regex", "safetensors", - "torch>=2.4", + "torch>=2.5", "torchprofile>=0.0.4", "torchvision", ] @@ -58,7 +58,7 @@ "onnxconverter-common", "onnxruntime~=1.22.0 ; platform_machine == 'aarch64' or platform_system == 'Darwin'", "onnxruntime-gpu~=1.22.0 ; platform_machine != 'aarch64' and platform_system != 'Darwin' and platform_system != 'Windows'", # noqa: E501 - "onnxruntime-gpu==1.20.0; platform_system == 'Windows'", + "onnxruntime-directml==1.20.0; platform_system == 'Windows'", "onnxscript", # For test_onnx_dynamo_export unit test "onnxsim ; python_version < '3.12' and platform_machine != 'aarch64'", "polygraphy>=0.49.22", diff --git a/tests/conftest.py b/tests/conftest.py index 034447cc..f7ffc03f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import platform + import pytest @@ -36,3 +38,9 @@ def pytest_collection_modifyitems(config, items): for item in items: if "manual" in item.keywords: item.add_marker(skipper) + + +@pytest.fixture +def skip_on_windows(): + if platform.system() == "Windows": + pytest.skip("Skipping on Windows") diff --git a/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py b/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py index ece792c5..8540a33d 100644 --- a/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py +++ b/tests/unit/torch/deploy/utils/test_torch_onnx_utils.py @@ -51,7 +51,7 @@ @pytest.mark.parametrize( "model", deploy_benchmark_dynamo.values(), ids=deploy_benchmark_dynamo.keys() ) -def test_onnx_dynamo_export(model: BaseDeployModel): +def test_onnx_dynamo_export(skip_on_windows, model: BaseDeployModel): # try it for all potential numeric types for active in range(model.get.num_choices): # retrieve args diff --git a/tests/unit/torch/nas/plugins/test_hf_nas_save_restore.py b/tests/unit/torch/nas/plugins/test_hf_nas_save_restore.py index b323d07f..d1270f36 100644 --- a/tests/unit/torch/nas/plugins/test_hf_nas_save_restore.py +++ b/tests/unit/torch/nas/plugins/test_hf_nas_save_restore.py @@ -16,11 +16,9 @@ import os import pytest -import torch from _test_utils.opt_utils import apply_mode_with_sampling pytest.importorskip("transformers") -import transformers from _test_utils.torch_model.transformers_models import ( create_tiny_bert_dir, tf_modelopt_state_and_output_tester, @@ -28,10 +26,6 @@ from transformers import AutoModelForQuestionAnswering, BertForQuestionAnswering -@pytest.mark.skipif( - transformers.__version__ >= "4.52" and torch.__version__ <= "2.4", - reason="Skip when transformers > 4.52 and torch <= 2.4", -) def test_pruned_transformers_save_restore(tmp_path): tiny_bert_dir = create_tiny_bert_dir(tmp_path) model_ref = BertForQuestionAnswering.from_pretrained(tiny_bert_dir) diff --git a/tests/unit/torch/quantization/test_autoquant.py b/tests/unit/torch/quantization/test_autoquant.py index 6c7ef17a..c0ca4e3f 100644 --- a/tests/unit/torch/quantization/test_autoquant.py +++ b/tests/unit/torch/quantization/test_autoquant.py @@ -289,7 +289,7 @@ def _test_data_parallel_auto_quantize(rank, size): assert search_history["best"]["is_satisfied"] -def test_data_parallel_auto_quantize(): +def test_data_parallel_auto_quantize(skip_on_windows): spawn_multiprocess_job(4, _test_data_parallel_auto_quantize, backend="gloo") diff --git a/tests/unit/torch/quantization/test_dist.py b/tests/unit/torch/quantization/test_dist.py index 8dff8a56..efb02bd0 100644 --- a/tests/unit/torch/quantization/test_dist.py +++ b/tests/unit/torch/quantization/test_dist.py @@ -43,5 +43,5 @@ def forward_loop(model): dist.destroy_process_group() -def test_data_parallel(): +def test_data_parallel(skip_on_windows): spawn_multiprocess_job(2, _test_data_parallel_helper, backend="gloo") diff --git a/tests/unit/torch/quantization/test_quant_rnn.py b/tests/unit/torch/quantization/test_quant_rnn.py index 284ba4c7..6f3d054c 100644 --- a/tests/unit/torch/quantization/test_quant_rnn.py +++ b/tests/unit/torch/quantization/test_quant_rnn.py @@ -54,14 +54,18 @@ def test_no_quant(self, original_cls, bidirectional, bias): rnn_object_original.eval() set_quantizer_attribute(quant_rnn_object, lambda name: True, {"enable": False}) - assert torch.allclose(quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0) - assert torch.allclose(quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0) + assert torch.allclose( + quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0, atol=1e-6 + ) + assert torch.allclose( + quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0, atol=1e-6 + ) test_input = torch.randn(INPUT_SHAPE) out1 = quant_rnn_object(test_input)[0] out2 = rnn_object_original(test_input)[0] - assert torch.allclose(out1, out2) + assert torch.allclose(out1, out2, atol=1e-6) @pytest.mark.parametrize( ("original_cls", "bidirectional", "bias"), @@ -84,8 +88,12 @@ def test_no_quant_packed_sequence(self, original_cls, bidirectional, bias): rnn_object_original.eval() set_quantizer_attribute(quant_rnn_object, lambda name: True, {"enable": False}) - assert torch.allclose(quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0) - assert torch.allclose(quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0) + assert torch.allclose( + quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0, atol=1e-6 + ) + assert torch.allclose( + quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0, atol=1e-6 + ) test_input = [ torch.randn([INPUT_SHAPE[0] - 1, INPUT_SHAPE[2]]), @@ -95,7 +103,7 @@ def test_no_quant_packed_sequence(self, original_cls, bidirectional, bias): out1 = quant_rnn_object(test_input)[0] out2 = rnn_object_original(test_input)[0] - assert torch.allclose(out1[0], out2[0]) + assert torch.allclose(out1[0], out2[0], atol=1e-6) @pytest.mark.parametrize( ("original_cls", "bidirectional", "bias"), @@ -122,7 +130,7 @@ def test_no_quant_proj(self, original_cls, bidirectional, bias): out1 = quant_rnn_object(test_input)[0] out2 = rnn_object_original(test_input)[0] - assert torch.allclose(out1, out2) + assert torch.allclose(out1, out2, atol=1e-6) @pytest.mark.parametrize( ("original_cls", "bidirectional"), @@ -148,7 +156,7 @@ def test_no_quant_batch_first(self, original_cls, bidirectional): out1 = quant_rnn_object(test_input)[0] out2 = rnn_object_original(test_input)[0] - assert torch.allclose(out1, out2) + assert torch.allclose(out1, out2, atol=1e-6) @pytest.mark.parametrize( ("original_cls", "bidirectional"), @@ -180,7 +188,7 @@ def test_fake_quant_per_tensor(self, original_cls, bidirectional): out1 = quant_rnn_object(test_input)[0] out2 = rnn_object_original(test_input)[0] - assert torch.allclose(out1, out2) + assert torch.allclose(out1, out2, atol=1e-6) @pytest.mark.parametrize( ("original_cls", "bidirectional"), @@ -211,7 +219,7 @@ def test_fake_quant_per_channel(self, original_cls, bidirectional): out1 = quant_rnn_object(test_input)[0] out2 = rnn_object_original(test_input)[0] - assert torch.allclose(out1, out2, atol=1e-5) + assert torch.allclose(out1, out2, atol=1e-6) @pytest.mark.parametrize( ("original_cls", "bidirectional"), @@ -258,4 +266,4 @@ def test_input_quant_per_tensor(self, original_cls, bidirectional): bidirectional, False, )[0] - assert torch.allclose(out1, out2) + assert torch.allclose(out1, out2, atol=1e-6) diff --git a/tox.ini b/tox.ini index aa522393..dc3dcc9d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,7 @@ [tox] envlist= pre-commit-all - py312-torch27-{unit,gpu} + py312-torch28-{unit,gpu} skipsdist = True toxworkdir = /tmp/{env:USER}-modelopt-tox @@ -9,16 +9,16 @@ toxworkdir = /tmp/{env:USER}-modelopt-tox ############################ # CPU Unit test environments ############################ -[testenv:{py39,py310,py311,py312}-torch{24,25,26,27}-unit] +[testenv:{py310,py311,py312}-torch{25,26,27,28}-unit] deps = # Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353 py312: onnxsim # torch version auto-selected based on torchvision version - torch24: torchvision~=0.19.0 torch25: torchvision~=0.20.0 torch26: torchvision~=0.21.0 torch27: torchvision~=0.22.0 + torch28: torchvision~=0.23.0 -e .[all,dev-test] commands = @@ -28,7 +28,7 @@ commands = ##################################################################### # Environment to run unit tests with subset of dependencies installed ##################################################################### -[testenv:{py39,py310,py311,py312}-ext-unit-{onnx,torch,torch_deploy}] +[testenv:{py310,py311,py312}-partial-unit-{onnx,torch,torch_deploy}] allowlist_externals = bash, rm deps = @@ -53,7 +53,7 @@ commands = ######################################################## # GPU test environments (Can be used with --current-env) ######################################################## -[testenv:{py39,py310,py311,py312}-cuda12-gpu] +[testenv:{py310,py311,py312}-cuda12-gpu] commands_pre = # Install deps here so that it gets installed even in --current-env pip install -U megatron-core