diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml
new file mode 100644
index 000000000..327e6633c
--- /dev/null
+++ b/.github/workflows/gpu_tests.yml
@@ -0,0 +1,34 @@
+name: GPU tests
+
+on:
+  push:
+    branches: ["pull-request/[0-9]+"]
+    paths:
+      - ".github/workflows/gpu_tests.yml"
+      - "modelopt/**"
+      - "tests/**"
+      - "setup.py"
+      - "tox.ini"
+
+# Cancel previous runs if new commit is pushed to the same PR
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  gpu-tests:
+    # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
+    runs-on: linux-amd64-gpu-h100-latest-1
+    timeout-minutes: 60
+    container:
+      image: nvcr.io/nvidia/pytorch:25.04-py3
+      env:
+        GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
+        LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
+        PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install dependencies
+        run: pip install tox-current-env
+      - name: Run gpu tests
+        run: tox -e py312-cuda12-gpu --current-env
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index 181225a25..b279a2764 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -24,10 +24,7 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.12"
-      # Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
       - name: Install dependencies
-        run: |
-          pip install onnxsim
-          pip install tox
+        run: pip install tox
       - name: Run unit tests
         run: tox -e py312-torch27-unit
diff --git a/tests/unit/torch/quantization/test_quant_rnn.py b/tests/unit/torch/quantization/test_quant_rnn.py
index b297028b7..284ba4c70 100644
--- a/tests/unit/torch/quantization/test_quant_rnn.py
+++ b/tests/unit/torch/quantization/test_quant_rnn.py
@@ -211,7 +211,7 @@ def test_fake_quant_per_channel(self, original_cls, bidirectional):
 
         out1 = quant_rnn_object(test_input)[0]
         out2 = rnn_object_original(test_input)[0]
-        assert torch.allclose(out1, out2)
+        assert torch.allclose(out1, out2, atol=1e-5)
 
     @pytest.mark.parametrize(
         ("original_cls", "bidirectional"),
diff --git a/tox.ini b/tox.ini
index e5fd614e3..aa5223938 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,7 +1,7 @@
 [tox]
 envlist=
     pre-commit-all
-    py312-torch27-unit
+    py312-torch27-{unit,gpu}
 skipsdist = True
 toxworkdir = /tmp/{env:USER}-modelopt-tox
 
@@ -11,6 +11,9 @@ toxworkdir = /tmp/{env:USER}-modelopt-tox
 ############################
 [testenv:{py39,py310,py311,py312}-torch{24,25,26,27}-unit]
 deps =
+    # Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
+    py312: onnxsim
+
     # torch version auto-selected based on torchvision version
     torch24: torchvision~=0.19.0
     torch25: torchvision~=0.20.0
@@ -22,6 +25,53 @@ commands =
     python -m pytest tests/unit --cov
 
 
+#####################################################################
+# Environment to run unit tests with subset of dependencies installed
+#####################################################################
+[testenv:{py39,py310,py311,py312}-ext-unit-{onnx,torch,torch_deploy}]
+allowlist_externals =
+    bash, rm
+deps =
+    # Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
+    py312: onnxsim
+
+    # ONNX unit tests heavily rely on torch / torchvision
+    onnx: .[onnx,dev-test]
+    onnx: torchvision
+
+    # Install megatron-core to test torch-only install can still import plugins
+    torch: megatron-core
+    torch: .[dev-test]
+
+    torch_deploy: .[onnx,torch,dev-test]
+commands =
+    onnx: python -m pytest tests/unit/onnx
+    torch: python -m pytest tests/unit/torch --ignore tests/unit/torch/deploy
+    torch_deploy: python -m pytest tests/unit/torch/deploy
+
+
+########################################################
+# GPU test environments (Can be used with --current-env)
+########################################################
+[testenv:{py39,py310,py311,py312}-cuda12-gpu]
+commands_pre =
+    # Install deps here so that it gets installed even in --current-env
+    pip install -U megatron-core
+    pip install git+https://github.com/Dao-AILab/fast-hadamard-transform.git
+
+    # Install Eagle-3 test dependencies
+    pip install tiktoken blobfile sentencepiece
+
+    # Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
+    py312: pip install onnxsim
+
+    # NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env
+    #   to avoid possible CUDA version mismatch
+    pip install -e .[all,dev-test]
+commands =
+    # Coverage fails with "Can't combine line data with arc data" error so not using "--cov"
+    python -m pytest tests/gpu
+
 #############################################
 # Code quality checks on all files or on diff
 #############################################
@@ -33,9 +83,9 @@ commands =
     diff: pre-commit run --from-ref origin/main --to-ref HEAD {posargs}
 
 
-#####################
-# Documentation build
-#####################
+#########################
+# Run documentation build
+#########################
 [testenv:{build,debug}-docs]
 allowlist_externals =
     rm
@@ -50,3 +100,26 @@ commands_pre =
 commands =
     sphinx-build source build/html --fail-on-warning --show-traceback --keep-going
     debug: sphinx-autobuild source build/html --host 0.0.0.0
+
+
+#################
+# Run wheel build
+#################
+[testenv:build-wheel]
+allowlist_externals =
+    bash, cd, rm
+passenv =
+    SETUPTOOLS_SCM_PRETEND_VERSION
+deps =
+    twine
+commands =
+    # Clean build directory to avoid any stale files getting into the wheel
+    rm -rf build
+
+    # Build and check wheel
+    pip wheel --no-deps --wheel-dir=dist .
+    twine check dist/*
+
+    # Install and test the wheel
+    bash -c "find dist -name 'nvidia_modelopt-*.whl' | xargs pip install -f dist"
+    bash -c "cd dist; python -c 'import modelopt; print(modelopt.__version__);'"