Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/gpu_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
runs-on: linux-amd64-gpu-h100-latest-1
timeout-minutes: 60
container:
image: nvcr.io/nvidia/pytorch:25.04-py3
image: nvcr.io/nvidia/pytorch:25.06-py3
env:
GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
Expand Down
49 changes: 49 additions & 0 deletions .github/workflows/multi_version_unit_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Run unit tests with older supported Python and Torch versions
name: Multi version tests

on:
pull_request:
branches: [main, release/*]
paths:
- ".github/workflows/multi_version_unit_tests.yml"
- "modelopt/**"
- "tests/**"
- "setup.py"
- "tox.ini"

# Cancel previous runs if new commit is pushed to the same PR
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true

jobs:
multi-py-unit:
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
matrix:
py: [10, 11]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.${{ matrix.py }}"
- name: Install dependencies
run: pip install tox
- name: Run unit tests
run: tox -e py3${{ matrix.py }}-torch28-unit
multi-torch-unit:
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
matrix:
torch: [25, 26, 27]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: pip install tox
- name: Run unit tests
run: tox -e py312-torch${{ matrix.torch }}-unit
33 changes: 33 additions & 0 deletions .github/workflows/partial_unit_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Partial install unit tests

on:
pull_request:
branches: [main, release/*]
paths:
- ".github/workflows/partial_unit_tests.yml"
- "modelopt/**"
- "tests/**"
- "setup.py"
- "tox.ini"

# Cancel previous runs if new commit is pushed to the same PR
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true

jobs:
partial-unit:
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
matrix:
test-env: [onnx, torch]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: pip install tox
- name: Run unit tests
run: tox -e py312-partial-unit-${{ matrix.test-env }}
7 changes: 5 additions & 2 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@ concurrency:

jobs:
unit-tests:
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
timeout-minutes: 30
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
Expand All @@ -27,4 +30,4 @@ jobs:
- name: Install dependencies
run: pip install tox
- name: Run unit tests
run: tox -e py312-torch27-unit
run: tox -e py312-torch28-unit
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Model Optimizer Changelog (Linux)

**Deprecations**

- Deprecate ``torch<2.5`` support.

**New Features**

- (Experimental) Add quantization support for custom TensorRT op in ONNX models.
Expand Down
4 changes: 2 additions & 2 deletions modelopt/torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@

from . import distill, nas, opt, prune, quantization, sparsity, speculative, utils

if _Version(_torch_version) < _Version("2.5"):
if _Version(_torch_version) < _Version("2.6"):
_warnings.warn(
"nvidia-modelopt will drop torch<2.5 support in a future release.", DeprecationWarning
"nvidia-modelopt will drop torch<2.6 support in a future release.", DeprecationWarning
)

# Since `hf` dependencies are optional and users have pre-installed transformers, we need to ensure
Expand Down
5 changes: 4 additions & 1 deletion modelopt/torch/_deploy/utils/torch_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,15 +411,18 @@ def get_onnx_bytes_and_metadata(
)
with torch.inference_mode(), autocast, quantizer_context:
if not dynamo_export or Version(torch.__version__) >= Version("2.6"):
additional_kwargs = {}
if not dynamo_export and Version(torch.__version__) >= Version("2.8"):
additional_kwargs["dynamic_axes"] = dynamic_axes
torch.onnx.export(
model,
dummy_input,
onnx_save_path,
input_names=input_names,
output_names=output_names,
opset_version=onnx_opset,
dynamic_axes=dynamic_axes,
dynamo=dynamo_export,
**additional_kwargs,
)
else: # torch < 2.6 with dynamo export
export_options = torch.onnx.ExportOptions(dynamic_shapes=True)
Expand Down
4 changes: 2 additions & 2 deletions modelopt/torch/nas/modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ def get_sliced_tensor_by_slices(
tensor_sliced = tensor
for i, _ in enumerate(slices):
if sum(not isinstance(s, slice) for s in slices) < 2:
tensor_sliced = tensor_sliced[slices]
tensor_sliced = tensor_sliced[tuple(slices)]
break
tensor_sliced = tensor_sliced[slices[: i + 1]]
tensor_sliced = tensor_sliced[tuple(slices[: i + 1])]
slices[i] = slice(None) # replace with a vanilla slice ("[:]") for next slicing iteration

# return sliced, contiguous tensor
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"pulp",
"regex",
"safetensors",
"torch>=2.4",
"torch>=2.5",
"torchprofile>=0.0.4",
"torchvision",
]
Expand All @@ -58,7 +58,7 @@
"onnxconverter-common",
"onnxruntime~=1.22.0 ; platform_machine == 'aarch64' or platform_system == 'Darwin'",
"onnxruntime-gpu~=1.22.0 ; platform_machine != 'aarch64' and platform_system != 'Darwin' and platform_system != 'Windows'", # noqa: E501
"onnxruntime-gpu==1.20.0; platform_system == 'Windows'",
"onnxruntime-directml==1.20.0; platform_system == 'Windows'",
"onnxscript", # For test_onnx_dynamo_export unit test
"onnxsim ; python_version < '3.12' and platform_machine != 'aarch64'",
"polygraphy>=0.49.22",
Expand Down
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import platform

import pytest


Expand All @@ -36,3 +38,9 @@ def pytest_collection_modifyitems(config, items):
for item in items:
if "manual" in item.keywords:
item.add_marker(skipper)


@pytest.fixture
def skip_on_windows():
if platform.system() == "Windows":
pytest.skip("Skipping on Windows")
2 changes: 1 addition & 1 deletion tests/unit/torch/deploy/utils/test_torch_onnx_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
@pytest.mark.parametrize(
"model", deploy_benchmark_dynamo.values(), ids=deploy_benchmark_dynamo.keys()
)
def test_onnx_dynamo_export(model: BaseDeployModel):
def test_onnx_dynamo_export(skip_on_windows, model: BaseDeployModel):
# try it for all potential numeric types
for active in range(model.get.num_choices):
# retrieve args
Expand Down
6 changes: 0 additions & 6 deletions tests/unit/torch/nas/plugins/test_hf_nas_save_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,16 @@
import os

import pytest
import torch
from _test_utils.opt_utils import apply_mode_with_sampling

pytest.importorskip("transformers")
import transformers
from _test_utils.torch_model.transformers_models import (
create_tiny_bert_dir,
tf_modelopt_state_and_output_tester,
)
from transformers import AutoModelForQuestionAnswering, BertForQuestionAnswering


@pytest.mark.skipif(
transformers.__version__ >= "4.52" and torch.__version__ <= "2.4",
reason="Skip when transformers > 4.52 and torch <= 2.4",
)
def test_pruned_transformers_save_restore(tmp_path):
tiny_bert_dir = create_tiny_bert_dir(tmp_path)
model_ref = BertForQuestionAnswering.from_pretrained(tiny_bert_dir)
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/torch/quantization/test_autoquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def _test_data_parallel_auto_quantize(rank, size):
assert search_history["best"]["is_satisfied"]


def test_data_parallel_auto_quantize():
def test_data_parallel_auto_quantize(skip_on_windows):
spawn_multiprocess_job(4, _test_data_parallel_auto_quantize, backend="gloo")


Expand Down
2 changes: 1 addition & 1 deletion tests/unit/torch/quantization/test_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,5 @@ def forward_loop(model):
dist.destroy_process_group()


def test_data_parallel():
def test_data_parallel(skip_on_windows):
spawn_multiprocess_job(2, _test_data_parallel_helper, backend="gloo")
30 changes: 19 additions & 11 deletions tests/unit/torch/quantization/test_quant_rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,18 @@ def test_no_quant(self, original_cls, bidirectional, bias):
rnn_object_original.eval()
set_quantizer_attribute(quant_rnn_object, lambda name: True, {"enable": False})

assert torch.allclose(quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0)
assert torch.allclose(quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0)
assert torch.allclose(
quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0, atol=1e-6
)
assert torch.allclose(
quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0, atol=1e-6
)

test_input = torch.randn(INPUT_SHAPE)

out1 = quant_rnn_object(test_input)[0]
out2 = rnn_object_original(test_input)[0]
assert torch.allclose(out1, out2)
assert torch.allclose(out1, out2, atol=1e-6)

@pytest.mark.parametrize(
("original_cls", "bidirectional", "bias"),
Expand All @@ -84,8 +88,12 @@ def test_no_quant_packed_sequence(self, original_cls, bidirectional, bias):
rnn_object_original.eval()
set_quantizer_attribute(quant_rnn_object, lambda name: True, {"enable": False})

assert torch.allclose(quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0)
assert torch.allclose(quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0)
assert torch.allclose(
quant_rnn_object.weight_ih_l0, rnn_object_original.weight_ih_l0, atol=1e-6
)
assert torch.allclose(
quant_rnn_object.weight_hh_l0, rnn_object_original.weight_hh_l0, atol=1e-6
)

test_input = [
torch.randn([INPUT_SHAPE[0] - 1, INPUT_SHAPE[2]]),
Expand All @@ -95,7 +103,7 @@ def test_no_quant_packed_sequence(self, original_cls, bidirectional, bias):

out1 = quant_rnn_object(test_input)[0]
out2 = rnn_object_original(test_input)[0]
assert torch.allclose(out1[0], out2[0])
assert torch.allclose(out1[0], out2[0], atol=1e-6)

@pytest.mark.parametrize(
("original_cls", "bidirectional", "bias"),
Expand All @@ -122,7 +130,7 @@ def test_no_quant_proj(self, original_cls, bidirectional, bias):

out1 = quant_rnn_object(test_input)[0]
out2 = rnn_object_original(test_input)[0]
assert torch.allclose(out1, out2)
assert torch.allclose(out1, out2, atol=1e-6)

@pytest.mark.parametrize(
("original_cls", "bidirectional"),
Expand All @@ -148,7 +156,7 @@ def test_no_quant_batch_first(self, original_cls, bidirectional):

out1 = quant_rnn_object(test_input)[0]
out2 = rnn_object_original(test_input)[0]
assert torch.allclose(out1, out2)
assert torch.allclose(out1, out2, atol=1e-6)

@pytest.mark.parametrize(
("original_cls", "bidirectional"),
Expand Down Expand Up @@ -180,7 +188,7 @@ def test_fake_quant_per_tensor(self, original_cls, bidirectional):

out1 = quant_rnn_object(test_input)[0]
out2 = rnn_object_original(test_input)[0]
assert torch.allclose(out1, out2)
assert torch.allclose(out1, out2, atol=1e-6)

@pytest.mark.parametrize(
("original_cls", "bidirectional"),
Expand Down Expand Up @@ -211,7 +219,7 @@ def test_fake_quant_per_channel(self, original_cls, bidirectional):

out1 = quant_rnn_object(test_input)[0]
out2 = rnn_object_original(test_input)[0]
assert torch.allclose(out1, out2, atol=1e-5)
assert torch.allclose(out1, out2, atol=1e-6)

@pytest.mark.parametrize(
("original_cls", "bidirectional"),
Expand Down Expand Up @@ -258,4 +266,4 @@ def test_input_quant_per_tensor(self, original_cls, bidirectional):
bidirectional,
False,
)[0]
assert torch.allclose(out1, out2)
assert torch.allclose(out1, out2, atol=1e-6)
10 changes: 5 additions & 5 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
[tox]
envlist=
pre-commit-all
py312-torch27-{unit,gpu}
py312-torch28-{unit,gpu}
skipsdist = True
toxworkdir = /tmp/{env:USER}-modelopt-tox


############################
# CPU Unit test environments
############################
[testenv:{py39,py310,py311,py312}-torch{24,25,26,27}-unit]
[testenv:{py310,py311,py312}-torch{25,26,27,28}-unit]
deps =
# Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
py312: onnxsim

# torch version auto-selected based on torchvision version
torch24: torchvision~=0.19.0
torch25: torchvision~=0.20.0
torch26: torchvision~=0.21.0
torch27: torchvision~=0.22.0
torch28: torchvision~=0.23.0

-e .[all,dev-test]
commands =
Expand All @@ -28,7 +28,7 @@ commands =
#####################################################################
# Environment to run unit tests with subset of dependencies installed
#####################################################################
[testenv:{py39,py310,py311,py312}-ext-unit-{onnx,torch,torch_deploy}]
[testenv:{py310,py311,py312}-partial-unit-{onnx,torch,torch_deploy}]
allowlist_externals =
bash, rm
deps =
Expand All @@ -53,7 +53,7 @@ commands =
########################################################
# GPU test environments (Can be used with --current-env)
########################################################
[testenv:{py39,py310,py311,py312}-cuda12-gpu]
[testenv:{py310,py311,py312}-cuda12-gpu]
commands_pre =
# Install deps here so that it gets installed even in --current-env
pip install -U megatron-core
Expand Down
Loading