Add llm_ptq PR test, Cleanup dependency installation in modelopt docker (#338)

kevalmorabia97 · web-flow · commit 4c36abe536f8 · 2025-09-19T10:48:44.000+05:30
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
@@ -15,7 +15,7 @@ concurrency:
 jobs:
   code-quality:
     runs-on: ubuntu-latest
-    timeout-minutes: 15
+    timeout-minutes: 30
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml
@@ -0,0 +1,100 @@
+# NOTE: Make sure this file is consistent with .gitlab/tests.yml
+name: E2E Example tests
+
+on:
+  push:
+    branches: ["pull-request/[0-9]+"]
+    # NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
+  schedule:
+    - cron: "0 0 * * *" # Nightly
+  workflow_dispatch: # On-demand
+
+# Cancel previous runs if new commit is pushed to the same PR
+concurrency:
+  group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  check-file-changes:
+    if: startsWith(github.ref, 'refs/heads/pull-request/')
+    runs-on: ubuntu-latest
+    outputs:
+      any_changed: ${{ steps.changed-tests.outputs.any_changed }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - id: get-pr-info
+        uses: nv-gha-runners/get-pr-info@main
+      # Get commit from main branch that is present in the PR to use as base for changed files
+      - id: calculate-merge-base
+        env:
+          PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
+          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+        run: |
+          (echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") | tee --append "${GITHUB_OUTPUT}"
+      - name: Check for changes in test-relevant directories
+        id: changed-tests
+        uses: step-security/changed-files@v46.0.5
+        with:
+          base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
+          sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
+          files: |
+            .github/workflows/example_tests.yml
+            examples/llm_ptq/**
+            modelopt/torch/**
+            tests/examples/llm_ptq/**
+            setup.py
+          fail_on_initial_diff_error: true
+  wait-checks:
+    needs: [check-file-changes]
+    if: needs.check-file-changes.outputs.any_changed == 'true'
+    uses: ./.github/workflows/_wait_for_checks.yml
+    permissions:
+      checks: read
+    secrets: inherit
+    with:
+      match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass
+      delay: 300s
+  example-tests-pr:
+    needs: [check-file-changes, wait-checks]
+    if: needs.check-file-changes.outputs.any_changed == 'true'
+    # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
+    runs-on: linux-amd64-gpu-h100-latest-1
+    timeout-minutes: 90
+    strategy:
+      matrix:
+        EXAMPLE: [llm_ptq]
+    container: &example_container
+      image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
+      env:
+        LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}"
+        # PATH: "/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"
+        PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
+    steps: &example_steps
+      - uses: actions/checkout@v4
+      - uses: nv-gha-runners/setup-proxy-cache@main
+      - name: Run example tests
+        run: |
+          pip install ".[all,dev-test]"
+          find examples/${{ matrix.EXAMPLE }} -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
+          pytest -s tests/examples/${{ matrix.EXAMPLE }}
+  example-tests-non-pr:
+    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
+    # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
+    runs-on: linux-amd64-gpu-h100-latest-1
+    timeout-minutes: 90
+    strategy:
+      matrix:
+        EXAMPLE: [llm_ptq]
+    container: *example_container
+    steps: *example_steps
+  example-pr-required-check:
+    # Run even if example-tests-pr is skipped
+    if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
+    needs: [check-file-changes, example-tests-pr]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Required GPU tests did not succeed
+        if: ${{ needs.check-file-changes.result != 'success' || (needs.check-file-changes.outputs.any_changed == 'true' && needs.example-tests-pr.result != 'success') }}
+        run: exit 1
diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml
@@ -44,7 +44,6 @@ jobs:
             modelopt/**
             tests/gpu/**
             tox.ini
-            pyproject.toml
             setup.py
           fail_on_initial_diff_error: true
   wait-checks:
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
@@ -23,7 +23,7 @@ permissions:
 jobs:
   build-docs:
     runs-on: ubuntu-latest
-    timeout-minutes: 15
+    timeout-minutes: 30
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -10,7 +10,6 @@ on:
       - ".github/workflows/unit_tests.yml"
       - "modelopt/**"
       - "tests/unit/**"
-      - "pyproject.toml"
       - "setup.py"
       - "tox.ini"
   schedule:
diff --git a/.gitlab/tests.yml b/.gitlab/tests.yml
@@ -49,23 +49,24 @@ example:
   tags: [docker, linux, 2-gpu, sm<89]
   parallel:
     matrix:
-      - TEST: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding]
+      - EXAMPLE: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding]
   allow_failure: true # Allow to continue next stages even if job is canceled (e.g. during release)
   before_script:
-    - pip install ".[all]" -U
+    - pip install ".[all,dev-test]"
   script:
     # Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
-    - if [ "$TEST" = "diffusers" ]; then pip uninstall -y apex; fi
-    - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$TEST; else bash tests/examples/test_$TEST.sh; fi
+    - if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
+    - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
+    - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi
 
 example-ada:
   extends: example
   timeout: 60m
   tags: [docker, linux, 2-gpu, sm>=89]
   parallel:
     matrix:
-      - TEST: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
-      - TEST: [onnx_ptq]
+      - EXAMPLE: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
+      - EXAMPLE: [onnx_ptq]
         TEST_TYPE: bash
 
 ##### Megatron / NeMo Integration Tests #####
diff --git a/README.md b/README.md
@@ -68,7 +68,7 @@ To install from source in editable mode with all development dependencies or to
 
 ```bash
 # Clone the Model Optimizer repository
-git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git
+git clone git@github.com:NVIDIA/TensorRT-Model-Optimizer.git
 cd TensorRT-Model-Optimizer
 
 pip install -e .[dev]
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,10 +1,9 @@
 FROM nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
 
-ARG PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com"
-ENV PIP_EXTRA_INDEX_URL=$PIP_EXTRA_INDEX_URL \
+ENV PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com" \
     PIP_NO_CACHE_DIR=off \
     PIP_CONSTRAINT= \
-    TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0+PTX"
+    TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0 12.0+PTX"
 
 RUN apt-get update && \
     apt-get install -y libgl1 && \
@@ -18,17 +17,11 @@ RUN ln -s /app/tensorrt_llm /workspace/tensorrt_llm
 ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}" \
     PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"
 
-# Install modelopt with all optional dependencies and pre-compile CUDA extensions otherwise they take several minutes on every docker run
-RUN pip install -U "nvidia-modelopt[all,dev-test]"
-RUN python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()"
-
-# Find and install requirements.txt files for all examples excluding windows
+# Install modelopt from source with all optional dependencies and pre-compile CUDA extensions otherwise they take several minutes on every docker run
 COPY . TensorRT-Model-Optimizer
+RUN pip install -e "./TensorRT-Model-Optimizer[all]"
 RUN rm -rf TensorRT-Model-Optimizer/.git
-RUN find TensorRT-Model-Optimizer/examples -name "requirements.txt" | grep -v "windows" | while read req_file; do \
-        echo "Installing from $req_file"; \
-        pip install -r "$req_file" || exit 1; \
-    done
+RUN python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()"
 
 # Allow users to run without root
 RUN chmod -R 777 /workspace
diff --git a/docs/source/getting_started/_installation_for_Linux.rst b/docs/source/getting_started/_installation_for_Linux.rst
@@ -34,16 +34,16 @@ Environment setup
 
     To use Model Optimizer with full dependencies (e.g. TensorRT/TensorRT-LLM deployment), we recommend using our provided docker image
     which is based on the `TensorRT-LLM <https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags>`_
-    docker image with additional example-specific dependencies installed.
+    docker image with additional dependencies installed.
 
     After installing the `NVIDIA Container Toolkit <https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>`_,
-    please run the following commands to build the Model Optimizer docker container which has all the necessary
-    dependencies pre-installed for running the examples.
+    please run the following commands to build the Model Optimizer docker container which has all the base
+    dependencies pre-installed. You may need to install additional dependencies from the examples's `requirements.txt` file.
 
     .. code-block:: shell
 
         # Clone the ModelOpt repository
-        git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git
+        git clone git@github.com:NVIDIA/TensorRT-Model-Optimizer.git
         cd TensorRT-Model-Optimizer
 
         # Build the docker (will be tagged `docker.io/library/modelopt_examples:latest`)
@@ -60,8 +60,7 @@ Environment setup
 
     For PyTorch, you can also use `NVIDIA NGC PyTorch container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags>`_
     and for NVIDIA NeMo framework, you can use the `NeMo container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo/tags>`_.
-    Both of these containers come with Model Optimizer pre-installed. NeMo container also comes with the HuggingFace and TensorRT-LLM
-    dependencies. Make sure to update the Model Optimizer to the latest version if not already.
+    Both of these containers come with Model Optimizer pre-installed. Make sure to update the Model Optimizer to the latest version if not already.
 
     For ONNX PTQ, you can use the optimized docker image from [onnx_ptq Dockerfile](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples/onnx_ptq/docker).
 
diff --git a/examples/cnn_qat/requirements.txt b/examples/cnn_qat/requirements.txt
@@ -0,0 +1 @@
+torchvision
diff --git a/examples/llm_eval/requirements.txt b/examples/llm_eval/requirements.txt
@@ -4,3 +4,4 @@ openai>=0.28.1
 peft>=0.5.0
 rwkv>=0.7.3
 tiktoken
+torchvision
diff --git a/examples/onnx_ptq/docker/Dockerfile b/examples/onnx_ptq/docker/Dockerfile
@@ -19,16 +19,16 @@ ENV LD_LIBRARY_PATH="${CUDNN_LIB_DIR}:${TRT_PATH}/lib:/usr/include:${LD_LIBRARY_
 ENV PATH="${TRT_PATH}/bin:${PATH}"
 
 # Copy application code and install requirements
-COPY modelopt modelopt/modelopt
-COPY examples/onnx_ptq modelopt/examples/onnx_ptq
-COPY setup.py modelopt/setup.py
-COPY pyproject.toml modelopt/pyproject.toml
+COPY modelopt TensorRT-Model-Optimizer/modelopt
+COPY examples/onnx_ptq TensorRT-Model-Optimizer/examples/onnx_ptq
+COPY setup.py TensorRT-Model-Optimizer/setup.py
+COPY pyproject.toml TensorRT-Model-Optimizer/pyproject.toml
 
 # Install onnx_ptq requirements
-RUN pip install -r modelopt/examples/onnx_ptq/requirements.txt
+RUN pip install -r TensorRT-Model-Optimizer/examples/onnx_ptq/requirements.txt
 
 # Install modelopt
-RUN pip install -e "./modelopt[hf,onnx]"
+RUN pip install -e "./TensorRT-Model-Optimizer[hf,onnx]"
 
 # Allow users to run without root
 RUN chmod -R 777 /workspace
diff --git a/examples/onnx_ptq/requirements.txt b/examples/onnx_ptq/requirements.txt
@@ -2,3 +2,4 @@ datasets>=2.14.4
 optimum
 sentencepiece
 timm
+torchvision
diff --git a/examples/pruning/cifar_resnet.ipynb b/examples/pruning/cifar_resnet.ipynb
@@ -22,6 +22,15 @@
         "Let's first install `Model Optimizer` following the [installation steps](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html)."
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "! pip install nvidia-modelopt torchvision"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 2,
diff --git a/setup.py b/setup.py
@@ -38,7 +38,6 @@
     "safetensors",
     "torch>=2.6",
     "torchprofile>=0.0.4",
-    "torchvision",
 ]
 
 optional_deps = {
@@ -79,6 +78,7 @@
         "pytest-cov",
         "pytest-timeout",
         "timm",
+        "torchvision",
         "tox>4.18",
         "tox-current-env>=0.0.12",
     ],
diff --git a/tests/examples/speculative_decoding/test_medusa.py b/tests/examples/speculative_decoding/test_medusa.py
@@ -13,21 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import subprocess
 
-import pytest
 from _test_utils.examples.run_command import run_example_command
 
 
-# TODO: Medusa QAT FSDP test hangs if transformers>=4.50
-@pytest.fixture(scope="session", autouse=True)
-def install_transformers_lt_4_50():
-    subprocess.run(
-        ["pip", "install", "transformers<4.50"],
-        check=True,
-    )
-
-
 # fmt: off
 def _run_hf_ptq(model_path, output_dir, qformat):
     run_example_command(