gcunhase
diff --git a/‎.github/workflows/_example_tests_runner.yml‎
Lines changed: 64 additions & 0 deletions b/‎.github/workflows/_example_tests_runner.yml‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎.github/workflows/example_tests.yml‎
Lines changed: 99 additions & 36 deletions b/‎.github/workflows/example_tests.yml‎
Lines changed: 99 additions & 36 deletions
diff --git a/‎.gitlab/release.yml‎
Lines changed: 0 additions & 2 deletions b/‎.gitlab/release.yml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.gitlab/tests.yml‎
Lines changed: 6 additions & 63 deletions b/‎.gitlab/tests.yml‎
Lines changed: 6 additions & 63 deletions
@@ -0,0 +1,64 @@
+# Reusable workflow for running example tests
+name: Example Tests Runner
+
+on:
+  workflow_call:
+    inputs:
+      docker_image:
+        description: "Docker image to use for tests"
+        required: true
+        type: string
+      example:
+        description: "Example name to test (e.g. 'llm_ptq')"
+        required: true
+        type: string
+      timeout_minutes:
+        description: "Timeout in minutes for the job"
+        required: false
+        type: number
+        default: 60
+      pip_install_extras:
+        description: "Pip install extras (e.g. '[hf,dev-test]' or '[all,dev-test]')"
+        required: false
+        type: string
+        default: "[all,dev-test]"
+      runner:
+        description: "GitHub runner to use"
+        required: false
+        type: string
+        default: "linux-amd64-gpu-h100-latest-1"
+
+jobs:
+  run-test:
+    runs-on: ${{ inputs.runner }}
+    timeout-minutes: ${{ inputs.timeout_minutes }}
+    container:
+      image: ${{ inputs.docker_image }}
+      env:
+        PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: nv-gha-runners/setup-proxy-cache@main
+      - name: Setup environment variables
+        run: |
+          echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib" >> $GITHUB_ENV
+          echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
+      - name: Install dependencies
+        run: |
+          # Install git-lfs for Daring-Anteater dataset
+          apt-get update && apt-get install -y git-lfs
+          git lfs install --system
+
+          pip install ".${{ inputs.pip_install_extras }}"
+
+          if [[ "${{ inputs.example }}" == *"diffusers"* ]]; then
+            echo "Uninstalling apex for diffusers: T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391"
+            pip uninstall -y apex || true
+          fi
+
+          find examples/${{ inputs.example }} -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
+      - name: Run tests
+        run: |
+          echo "Running tests for: ${{ inputs.example }}"
+          pytest tests/examples/${{ inputs.example }}
@@ -1,5 +1,4 @@
-# NOTE: Make sure this file is consistent with .gitlab/tests.yml
-name: E2E Example tests
+name: Example tests
 
 on:
   push:
@@ -41,10 +40,10 @@ jobs:
           sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
           files: |
             .github/workflows/example_tests.yml
-            examples/llm_ptq/**
-            modelopt/torch/**
-            tests/examples/llm_ptq/**
+            examples/**
+            modelopt/**
             setup.py
+            tests/examples/**
           fail_on_initial_diff_error: true
   wait-checks:
     needs: [check-file-changes]
@@ -56,46 +55,110 @@ jobs:
     with:
       match_pattern: "^DCO$|^linux$" # Wait for DCO and Unit tests / linux to pass
       delay: 300s
-  example-tests-pr:
+
+  ##### PyTorch Example Tests #####
+  torch-pr:
     needs: [check-file-changes, wait-checks]
-    if: needs.check-file-changes.outputs.any_changed == 'true'
-    runs-on: linux-amd64-gpu-h100-latest-1
-    timeout-minutes: 90
+    if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true'
     strategy:
+      fail-fast: false
       matrix:
-        EXAMPLE: [llm_ptq]
-    container: &example_container
-      image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
-      env:
-        PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
-        HF_TOKEN: ${{ secrets.HF_TOKEN }}
-    steps: &example_steps
-      - uses: actions/checkout@v4
-      - uses: nv-gha-runners/setup-proxy-cache@main
-      - name: Setup environment variables
-        run: |
-          echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib" >> $GITHUB_ENV
-          echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
-      - name: Run example tests
-        run: |
-          pip install ".[hf,dev-test]"
-          find examples/${{ matrix.EXAMPLE }} -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
-          pytest -s tests/examples/${{ matrix.EXAMPLE }}
-  example-tests-non-pr:
+        example: [llm_distill, llm_qat, llm_sparsity, speculative_decoding]
+    uses: ./.github/workflows/_example_tests_runner.yml
+    secrets: inherit
+    with:
+      docker_image: "nvcr.io/nvidia/pytorch:25.06-py3"
+      example: ${{ matrix.example }}
+      pip_install_extras: "[hf,dev-test]"
+      runner: linux-amd64-gpu-l4-latest-1
+
+  torch-non-pr:
     if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    runs-on: linux-amd64-gpu-h100-latest-2
-    timeout-minutes: 90
     strategy:
+      fail-fast: false
+      matrix:
+        example: [llm_distill, llm_qat, llm_sparsity, speculative_decoding]
+    uses: ./.github/workflows/_example_tests_runner.yml
+    secrets: inherit
+    with:
+      docker_image: "nvcr.io/nvidia/pytorch:25.06-py3"
+      example: ${{ matrix.example }}
+      pip_install_extras: "[hf,dev-test]"
+      runner: linux-amd64-gpu-h100-latest-2
+
+  ##### TensorRT-LLM Example Tests #####
+  trtllm-pr:
+    needs: [check-file-changes, wait-checks]
+    if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true'
+    strategy:
+      fail-fast: false
       matrix:
-        EXAMPLE: [llm_ptq]
-    container: *example_container
-    steps: *example_steps
+        example: [llm_ptq]
+    uses: ./.github/workflows/_example_tests_runner.yml
+    secrets: inherit
+    with:
+      docker_image: "nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2"
+      example: ${{ matrix.example }}
+      pip_install_extras: "[hf,dev-test]"
+      runner: linux-amd64-gpu-h100-latest-1
+
+  trtllm-non-pr:
+    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
+    strategy:
+      fail-fast: false
+      matrix:
+        example: [llm_autodeploy, llm_eval, llm_ptq, vlm_ptq]
+    uses: ./.github/workflows/_example_tests_runner.yml
+    secrets: inherit
+    with:
+      docker_image: "nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2"
+      example: ${{ matrix.example }}
+      pip_install_extras: "[hf,dev-test]"
+      runner: linux-amd64-gpu-h100-latest-2
+
+  ##### ONNX/TensorRT Example Tests #####
+  onnx-pr:
+    needs: [check-file-changes, wait-checks]
+    if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true'
+    strategy:
+      fail-fast: false
+      matrix:
+        example: [diffusers]
+    uses: ./.github/workflows/_example_tests_runner.yml
+    secrets: inherit
+    with:
+      docker_image: "nvcr.io/nvidia/tensorrt:25.08-py3"
+      example: ${{ matrix.example }}
+      pip_install_extras: "[all,dev-test]"
+      runner: linux-amd64-gpu-l4-latest-1
+
+  onnx-non-pr:
+    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
+    strategy:
+      fail-fast: false
+      matrix:
+        example: [diffusers, onnx_ptq]
+    uses: ./.github/workflows/_example_tests_runner.yml
+    secrets: inherit
+    with:
+      docker_image: "nvcr.io/nvidia/tensorrt:25.08-py3"
+      example: ${{ matrix.example }}
+      pip_install_extras: "[all,dev-test]"
+      runner: linux-amd64-gpu-l4-latest-1
+
+  ##### Required Check for PR #####
   example-pr-required-check:
-    # Run even if example-tests-pr is skipped
+    # Run even if example tests are skipped
     if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
-    needs: [check-file-changes, example-tests-pr]
+    needs: [check-file-changes, torch-pr, trtllm-pr, onnx-pr]
     runs-on: ubuntu-latest
     steps:
       - name: Required GPU tests did not succeed
-        if: ${{ needs.check-file-changes.result != 'success' || (needs.check-file-changes.outputs.any_changed == 'true' && needs.example-tests-pr.result != 'success') }}
+        if: |
+          needs.check-file-changes.result != 'success' ||
+          (needs.check-file-changes.outputs.any_changed == 'true' && (
+            needs.torch-pr.result != 'success' ||
+            needs.trtllm-pr.result != 'success' ||
+            needs.onnx-pr.result != 'success'
+          ))
         run: exit 1
@@ -10,14 +10,12 @@ build-and-upload-wheels:
     - if: $JET_ONLY != null
       when: never
     - if: $CI_COMMIT_TAG =~ /^\d+\.\d+\.\d+$/
-      when: manual
       variables:
         RELEASE: "true"
         TWINE_USERNAME: svc-dl-algo-ammo
         TWINE_PASSWORD: $ARTIFACTORY_TOKEN # Configured in GitLab > Settings > CI/CD
         REPO_URL: https://urm.nvidia.com/artifactory/api/pypi/sw-dl-algo-ammo-pypi-local
     - if: $CI_PIPELINE_SOURCE == "schedule"
-      when: manual
       variables:
         RELEASE: "false"
         TWINE_USERNAME: gitlab-ci-token
 
@@ -9,79 +9,22 @@
     - if: $CI_PIPELINE_SOURCE == "web" || $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED == "true"
       when: manual
 
-##### Unit Tests #####
-unit:
-  extends: .tests-default
-  timeout: 30m
-  variables:
-    PYTHON: 12
-    TORCH: 29
-    TRANSFORMERS: latest
-  image: python:3.$PYTHON
-  before_script:
-    - pip install tox
-  script:
-    - tox -e py3$PYTHON-torch$TORCH-tf_$TRANSFORMERS-unit
-
-##### GPU Tests #####
-.multi-gpu-tests-default:
+##### Example Tests #####
+example-onnx-bash:
   extends: .tests-default
   timeout: 90m
-  image: nvcr.io/nvidia/pytorch:25.06-py3
-  variables:
-    GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
-  tags: [docker, linux, 2-gpu]
-  before_script:
-    # Add libcudnn*.so and libnv*.so to path
-    - export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu"
-    # Install git-lfs for Daring-Anteater dataset
-    - apt-get update && apt-get install -y git-lfs
-    - git lfs install --system
-
-multi-gpu:
-  extends: .multi-gpu-tests-default
-  script:
-    # Use pre-installed packages without a new venv with tox-current-env
-    - pip install tox-current-env
-    - tox -e py312-cuda12-gpu --current-env
-
-##### Example Tests #####
-example-torch:
-  extends: .multi-gpu-tests-default
-  timeout: 30m
-  parallel:
-    matrix:
-      - EXAMPLE: [llm_distill, llm_qat, llm_sparsity, speculative_decoding]
-  script:
-    - pip install ".[hf,dev-test]"
-    - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
-    - pytest -s tests/examples/$EXAMPLE
-
-example-trtllm:
-  extends: example-torch
-  timeout: 60m
-  image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
-  tags: [docker, linux, 2-gpu, sm>=89]
-  parallel:
-    matrix:
-      - EXAMPLE: [llm_autodeploy, llm_eval, llm_ptq, vlm_ptq]
-
-example-onnx:
-  extends: example-torch
   image: nvcr.io/nvidia/tensorrt:25.08-py3
   tags: [docker, linux, 2-gpu, sm>=89]
   parallel:
     matrix:
-      - EXAMPLE: [diffusers, onnx_ptq]
-        TEST_TYPE: pytest
       - EXAMPLE: [onnx_ptq]
-        TEST_TYPE: bash
+  before_script:
+    # Add libcudnn*.so and libnv*.so to path
+    - export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu"
   script:
-    # Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
-    - if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
     - pip install ".[all,dev-test]"
     - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
-    - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi
+    - bash tests/examples/test_$EXAMPLE.sh
 
 ##### Megatron / NeMo Integration Tests #####
 megatron-nemo-integration: