cleanup

kevalmorabia97 · kevalmorabia97 · commit d44fb7e603f2 · 2025-09-18T08:05:04.000-07:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
@@ -15,7 +15,7 @@ concurrency:
 jobs:
   code-quality:
     runs-on: ubuntu-latest
-    timeout-minutes: 15
+    timeout-minutes: 30
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml
@@ -0,0 +1,100 @@
+# NOTE: Make sure this file is consistent with .gitlab/tests.yml
+name: E2E Example tests
+
+on:
+  push:
+    branches: ["pull-request/[0-9]+"]
+    # NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
+  schedule:
+    - cron: "0 0 * * *" # Nightly
+  workflow_dispatch: # On-demand
+
+# Cancel previous runs if new commit is pushed to the same PR
+concurrency:
+  group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  check-file-changes:
+    if: startsWith(github.ref, 'refs/heads/pull-request/')
+    runs-on: ubuntu-latest
+    outputs:
+      any_changed: ${{ steps.changed-tests.outputs.any_changed }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - id: get-pr-info
+        uses: nv-gha-runners/get-pr-info@main
+      # Get commit from main branch that is present in the PR to use as base for changed files
+      - id: calculate-merge-base
+        env:
+          PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
+          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+        run: |
+          (echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") | tee --append "${GITHUB_OUTPUT}"
+      - name: Check for changes in test-relevant directories
+        id: changed-tests
+        uses: step-security/changed-files@v46.0.5
+        with:
+          base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
+          sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
+          files: |
+            .github/workflows/example_tests.yml
+            examples/llm_ptq/**
+            modelopt/torch/**
+            tests/examples/llm_ptq/**
+            setup.py
+          fail_on_initial_diff_error: true
+  wait-checks:
+    needs: [check-file-changes]
+    if: needs.check-file-changes.outputs.any_changed == 'true'
+    uses: ./.github/workflows/_wait_for_checks.yml
+    permissions:
+      checks: read
+    secrets: inherit
+    with:
+      match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass
+      delay: 300s
+  example-tests-pr:
+    needs: [check-file-changes, wait-checks]
+    if: needs.check-file-changes.outputs.any_changed == 'true'
+    # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
+    runs-on: linux-amd64-gpu-h100-latest-1
+    timeout-minutes: 90
+    strategy:
+      matrix:
+        EXAMPLE: [llm_ptq]
+    container: &example_container
+      image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
+      env:
+        LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}"
+        # PATH: "/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"
+        PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
+    steps: &example_steps
+      - uses: actions/checkout@v4
+      - uses: nv-gha-runners/setup-proxy-cache@main
+      - name: Run example tests
+        run: |
+          pip install ".[all]"
+          find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
+          pytest -s tests/examples/$EXAMPLE
+  example-tests-non-pr:
+    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
+    # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
+    runs-on: linux-amd64-gpu-h100-latest-1
+    timeout-minutes: 90
+    strategy:
+      matrix:
+        EXAMPLE: [llm_ptq]
+    container: *example_container
+    steps: *example_steps
+  example-pr-required-check:
+    # Run even if example-tests-pr is skipped
+    if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
+    needs: [check-file-changes, example-tests-pr]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Required GPU tests did not succeed
+        if: ${{ needs.check-file-changes.result != 'success' || (needs.check-file-changes.outputs.any_changed == 'true' && needs.example-tests-pr.result != 'success') }}
+        run: exit 1
diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml
@@ -44,7 +44,6 @@ jobs:
             modelopt/**
             tests/gpu/**
             tox.ini
-            pyproject.toml
             setup.py
           fail_on_initial_diff_error: true
   wait-checks:
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
@@ -23,7 +23,7 @@ permissions:
 jobs:
   build-docs:
     runs-on: ubuntu-latest
-    timeout-minutes: 15
+    timeout-minutes: 30
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -10,7 +10,6 @@ on:
       - ".github/workflows/unit_tests.yml"
       - "modelopt/**"
       - "tests/unit/**"
-      - "pyproject.toml"
       - "setup.py"
       - "tox.ini"
   schedule:
diff --git a/.gitlab/tests.yml b/.gitlab/tests.yml
@@ -49,24 +49,24 @@ example:
   tags: [docker, linux, 2-gpu, sm<89]
   parallel:
     matrix:
-      - TEST: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding]
+      - EXAMPLE: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding]
   allow_failure: true # Allow to continue next stages even if job is canceled (e.g. during release)
   before_script:
     - pip install ".[all]" -U
   script:
     # Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
-    - if [ "$TEST" = "diffusers" ]; then pip uninstall -y apex; fi
-    - if [ -f examples/$TEST/requirements.txt ]; then pip install -r examples/$TEST/requirements.txt; fi
-    - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$TEST; else bash tests/examples/test_$TEST.sh; fi
+    - if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
+    - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
+    - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi
 
 example-ada:
   extends: example
   timeout: 60m
   tags: [docker, linux, 2-gpu, sm>=89]
   parallel:
     matrix:
-      - TEST: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
-      - TEST: [onnx_ptq]
+      - EXAMPLE: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
+      - EXAMPLE: [onnx_ptq]
         TEST_TYPE: bash
 
 ##### Megatron / NeMo Integration Tests #####
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -4,7 +4,7 @@ ARG PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com"
 ENV PIP_EXTRA_INDEX_URL=$PIP_EXTRA_INDEX_URL \
     PIP_NO_CACHE_DIR=off \
     PIP_CONSTRAINT= \
-    TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0+PTX"
+    TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0 12.0+PTX"
 
 RUN apt-get update && \
     apt-get install -y libgl1 && \
diff --git a/tests/examples/speculative_decoding/test_medusa.py b/tests/examples/speculative_decoding/test_medusa.py
@@ -13,21 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import subprocess
 
-import pytest
 from _test_utils.examples.run_command import run_example_command
 
 
-# TODO: Medusa QAT FSDP test hangs if transformers>=4.50
-@pytest.fixture(scope="session", autouse=True)
-def install_transformers_lt_4_50():
-    subprocess.run(
-        ["pip", "install", "transformers<4.50"],
-        check=True,
-    )
-
-
 # fmt: off
 def _run_hf_ptq(model_path, output_dir, qformat):
     run_example_command(