Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/workflows/gpu_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: GPU tests

on:
push:
branches: ["pull-request/[0-9]+"]
paths:
- ".github/workflows/gpu_tests.yml"
- "modelopt/**"
- "tests/**"
- "setup.py"
- "tox.ini"

# Cancel previous runs if new commit is pushed to the same PR
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true

jobs:
gpu-tests:
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
runs-on: linux-amd64-gpu-h100-latest-1
timeout-minutes: 60
container:
image: nvcr.io/nvidia/pytorch:25.04-py3
env:
GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: pip install tox-current-env
- name: Run gpu tests
run: tox -e py312-cuda12-gpu --current-env
5 changes: 1 addition & 4 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: "3.12"
# Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
- name: Install dependencies
run: |
pip install onnxsim
pip install tox
run: pip install tox
- name: Run unit tests
run: tox -e py312-torch27-unit
2 changes: 1 addition & 1 deletion tests/unit/torch/quantization/test_quant_rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def test_fake_quant_per_channel(self, original_cls, bidirectional):

out1 = quant_rnn_object(test_input)[0]
out2 = rnn_object_original(test_input)[0]
assert torch.allclose(out1, out2)
assert torch.allclose(out1, out2, atol=1e-5)

@pytest.mark.parametrize(
("original_cls", "bidirectional"),
Expand Down
81 changes: 77 additions & 4 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tox]
envlist=
pre-commit-all
py312-torch27-unit
py312-torch27-{unit,gpu}
skipsdist = True
toxworkdir = /tmp/{env:USER}-modelopt-tox

Expand All @@ -11,6 +11,9 @@ toxworkdir = /tmp/{env:USER}-modelopt-tox
############################
[testenv:{py39,py310,py311,py312}-torch{24,25,26,27}-unit]
deps =
# Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
py312: onnxsim

# torch version auto-selected based on torchvision version
torch24: torchvision~=0.19.0
torch25: torchvision~=0.20.0
Expand All @@ -22,6 +25,53 @@ commands =
python -m pytest tests/unit --cov


#####################################################################
# Environment to run unit tests with subset of dependencies installed
#####################################################################
[testenv:{py39,py310,py311,py312}-ext-unit-{onnx,torch,torch_deploy}]
allowlist_externals =
bash, rm
deps =
# Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
py312: onnxsim

# ONNX unit tests heavily rely on torch / torchvision
onnx: .[onnx,dev-test]
onnx: torchvision

# Install megatron-core to test torch-only install can still import plugins
torch: megatron-core
torch: .[dev-test]

torch_deploy: .[onnx,torch,dev-test]
commands =
onnx: python -m pytest tests/unit/onnx
torch: python -m pytest tests/unit/torch --ignore tests/unit/torch/deploy
torch_deploy: python -m pytest tests/unit/torch/deploy


########################################################
# GPU test environments (Can be used with --current-env)
########################################################
[testenv:{py39,py310,py311,py312}-cuda12-gpu]
commands_pre =
# Install deps here so that it gets installed even in --current-env
pip install -U megatron-core
pip install git+https://github.com/Dao-AILab/fast-hadamard-transform.git

# Install Eagle-3 test dependencies
pip install tiktoken blobfile sentencepiece

# Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
py312: pip install onnxsim

# NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env
# to avoid possible CUDA version mismatch
pip install -e .[all,dev-test]
commands =
# Coverage fails with "Can't combine line data with arc data" error so not using "--cov"
python -m pytest tests/gpu

#############################################
# Code quality checks on all files or on diff
#############################################
Expand All @@ -33,9 +83,9 @@ commands =
diff: pre-commit run --from-ref origin/main --to-ref HEAD {posargs}


#####################
# Documentation build
#####################
#########################
# Run documentation build
#########################
[testenv:{build,debug}-docs]
allowlist_externals =
rm
Expand All @@ -50,3 +100,26 @@ commands_pre =
commands =
sphinx-build source build/html --fail-on-warning --show-traceback --keep-going
debug: sphinx-autobuild source build/html --host 0.0.0.0


#################
# Run wheel build
#################
[testenv:build-wheel]
allowlist_externals =
bash, cd, rm
passenv =
SETUPTOOLS_SCM_PRETEND_VERSION
deps =
twine
commands =
# Clean build directory to avoid any stale files getting into the wheel
rm -rf build

# Build and check wheel
pip wheel --no-deps --wheel-dir=dist .
twine check dist/*

# Install and test the wheel
bash -c "find dist -name 'nvidia_modelopt-*.whl' | xargs pip install -f dist"
bash -c "cd dist; python -c 'import modelopt; print(modelopt.__version__);'"
Loading