From dc342eb623808a1fbea9fca3e59820b41eae4f31 Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Mon, 11 Aug 2025 21:13:40 +0530 Subject: [PATCH 1/4] Cleanup CI Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --- .github/codecov.yml | 8 +++ .github/workflows/_wait_for_checks.yml | 29 ++++++++++ .github/workflows/gpu_tests.yml | 19 +++++- .../workflows/multi_version_unit_tests.yml | 46 --------------- .github/workflows/partial_unit_tests.yml | 32 ---------- .github/workflows/unit_tests.yml | 58 ++++++++++++++++++- 6 files changed, 111 insertions(+), 81 deletions(-) create mode 100644 .github/codecov.yml create mode 100644 .github/workflows/_wait_for_checks.yml delete mode 100644 .github/workflows/multi_version_unit_tests.yml delete mode 100644 .github/workflows/partial_unit_tests.yml diff --git a/.github/codecov.yml b/.github/codecov.yml new file mode 100644 index 000000000..fec90abb6 --- /dev/null +++ b/.github/codecov.yml @@ -0,0 +1,8 @@ +# Allow atmost 5% coverage drop from main branch. +coverage: + status: + project: + default: + target: auto + threshold: 5% + patch: false diff --git a/.github/workflows/_wait_for_checks.yml b/.github/workflows/_wait_for_checks.yml new file mode 100644 index 000000000..9e28fcaa2 --- /dev/null +++ b/.github/workflows/_wait_for_checks.yml @@ -0,0 +1,29 @@ +name: Wait for checks + +on: + workflow_call: + inputs: + match_pattern: + required: true + type: string + delay: + required: false + type: string + default: 10s + +jobs: + wait: + runs-on: ubuntu-latest + permissions: + checks: read + steps: + - name: Wait for checks (PRs only) + if: github.event_name == 'pull_request' || startsWith(github.ref, 'refs/heads/pull-request/') + uses: poseidon/wait-for-status-checks@v0.6.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + match_pattern: ${{ inputs.match_pattern }} + delay: ${{ inputs.delay }} + - name: No-op for non-PR events + if: github.event_name != 'pull_request' && !startsWith(github.ref, 'refs/heads/pull-request/') + run: echo "Not a pull_request event" diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml index 2665a6006..c77a43396 100644 --- a/.github/workflows/gpu_tests.yml +++ b/.github/workflows/gpu_tests.yml @@ -14,11 +14,28 @@ on: # Cancel previous runs if new commit is pushed to the same PR concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: + wait-dco: + uses: ./.github/workflows/_wait_for_checks.yml + permissions: + checks: read + secrets: inherit + with: + match_pattern: '^DCO$' + wait-unit-tests: + needs: [wait-dco] + uses: ./.github/workflows/_wait_for_checks.yml + permissions: + checks: read + secrets: inherit + with: + match_pattern: '^linux$' # Unit tests / linux + delay: 60s gpu-tests: + needs: [wait-unit-tests] # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md runs-on: linux-amd64-gpu-h100-latest-1 timeout-minutes: 60 diff --git a/.github/workflows/multi_version_unit_tests.yml b/.github/workflows/multi_version_unit_tests.yml deleted file mode 100644 index def4b392b..000000000 --- a/.github/workflows/multi_version_unit_tests.yml +++ /dev/null @@ -1,46 +0,0 @@ -# Run unit tests with older supported Python and Torch versions -name: Multi version tests - -on: - pull_request: - branches: [main, release/*] - paths: - - ".github/workflows/multi_version_unit_tests.yml" - - "modelopt/**" - - "tests/unit/**" - - "pyproject.toml" - - "setup.py" - - "tox.ini" - -# Cancel previous runs if new commit is pushed to the same PR -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number }} - cancel-in-progress: true - -jobs: - multi-py-unit: - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - matrix: - py: [10, 11] - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: "3.${{ matrix.py }}" - - name: Run unit tests - run: pip install tox && tox -e py3${{ matrix.py }}-torch28-unit - multi-torch-unit: - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - matrix: - torch: [25, 26, 27] - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - name: Run unit tests - run: pip install tox && tox -e py312-torch${{ matrix.torch }}-unit diff --git a/.github/workflows/partial_unit_tests.yml b/.github/workflows/partial_unit_tests.yml deleted file mode 100644 index c3cb5ce18..000000000 --- a/.github/workflows/partial_unit_tests.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Partial install unit tests - -on: - pull_request: - branches: [main, release/*] - paths: - - ".github/workflows/partial_unit_tests.yml" - - "modelopt/**" - - "tests/unit/**" - - "pyproject.toml" - - "setup.py" - - "tox.ini" - -# Cancel previous runs if new commit is pushed to the same PR -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number }} - cancel-in-progress: true - -jobs: - partial-unit: - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - matrix: - test-env: [onnx, torch] - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - name: Run unit tests - run: pip install tox && tox -e py312-partial-unit-${{ matrix.test-env }} diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 9fcc6c87b..355baab54 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -26,7 +26,15 @@ concurrency: cancel-in-progress: true jobs: - unit-tests: + wait-dco: + uses: ./.github/workflows/_wait_for_checks.yml + permissions: + checks: read + secrets: inherit + with: + match_pattern: '^DCO$' + linux: + needs: [wait-dco] runs-on: ubuntu-latest timeout-minutes: 30 steps: @@ -42,8 +50,9 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} fail_ci_if_error: true verbose: true - unit-tests-windows: + windows: if: github.event_name == 'pull_request' + needs: [linux] runs-on: windows-latest timeout-minutes: 30 steps: @@ -53,3 +62,48 @@ jobs: python-version: "3.12" - name: Run unit tests (without coverage) run: pip install tox && tox -e py312-torch28-unit + multi-py: + if: github.event_name == 'pull_request' + needs: [linux] + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + matrix: + py: [10, 11] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.${{ matrix.py }}" + - name: Run unit tests + run: pip install tox && tox -e py3${{ matrix.py }}-torch28-unit + multi-torch: + if: github.event_name == 'pull_request' + needs: [linux] + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + matrix: + torch: [25, 26, 27] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Run unit tests + run: pip install tox && tox -e py312-torch${{ matrix.torch }}-unit + partial-install: + if: github.event_name == 'pull_request' + needs: [linux] + runs-on: ubuntu-latest + timeout-minutes: 30 + strategy: + matrix: + test-env: [onnx, torch] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Run unit tests + run: pip install tox && tox -e py312-partial-unit-${{ matrix.test-env }} From db23e00325d1724992f1a57cfd9d73f18e304d08 Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Mon, 11 Aug 2025 22:04:01 +0530 Subject: [PATCH 2/4] Run gpu tests if relevant files are changed Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --- .github/workflows/gpu_tests.yml | 37 +++++++++++++++++++++----------- .github/workflows/pages.yml | 2 +- .github/workflows/unit_tests.yml | 2 +- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml index c77a43396..3eca9cc5f 100644 --- a/.github/workflows/gpu_tests.yml +++ b/.github/workflows/gpu_tests.yml @@ -3,18 +3,11 @@ name: GPU tests on: push: branches: ["pull-request/[0-9]+"] - # TODO: paths cannot be used since push happens to copied PR and only latest commit to PR is used - # paths: - # - ".github/workflows/gpu_tests.yml" - # - "modelopt/**" - # - "tests/gpu/**" - # - "pyproject.toml" - # - "setup.py" - # - "tox.ini" + # NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used # Cancel previous runs if new commit is pushed to the same PR concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.sha }} cancel-in-progress: true jobs: @@ -25,6 +18,26 @@ jobs: secrets: inherit with: match_pattern: '^DCO$' + check-changes: + needs: [wait-dco] + runs-on: ubuntu-latest + outputs: + any_changed: ${{ steps.changed-tests.outputs.any_changed }} + steps: + - id: get-pr-info + uses: nv-gha-runners/get-pr-info@main + - name: Check for changes in test-relevant directories + id: changed-tests + uses: step-security/changed-files@v46.0.5 + with: + files: | + .github/workflows/gpu_tests.yml + modelopt/** + tests/gpu/** + tox.ini + pyproject.toml + setup.py + base_sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }} wait-unit-tests: needs: [wait-dco] uses: ./.github/workflows/_wait_for_checks.yml @@ -35,7 +48,8 @@ jobs: match_pattern: '^linux$' # Unit tests / linux delay: 60s gpu-tests: - needs: [wait-unit-tests] + needs: [check-changes, wait-unit-tests] + if: needs.check-changes.outputs.any_changed == 'true' # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md runs-on: linux-amd64-gpu-h100-latest-1 timeout-minutes: 60 @@ -47,7 +61,6 @@ jobs: PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages steps: - uses: actions/checkout@v4 - - name: Setup proxy cache - uses: nv-gha-runners/setup-proxy-cache@main + - uses: nv-gha-runners/setup-proxy-cache@main - name: Run gpu tests run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 7b76bf75c..ba7355c57 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -8,7 +8,7 @@ on: # Cancel previous runs if new commit is pushed concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} cancel-in-progress: true # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 355baab54..34f0124c7 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -22,7 +22,7 @@ on: # Cancel previous runs if new commit is pushed concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} cancel-in-progress: true jobs: From 9834826aa6ade3ff5601729a0d483a6eea1b7288 Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Mon, 11 Aug 2025 22:15:39 +0530 Subject: [PATCH 3/4] temp Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --- .github/workflows/gpu_tests.yml | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml index 3eca9cc5f..c3726609f 100644 --- a/.github/workflows/gpu_tests.yml +++ b/.github/workflows/gpu_tests.yml @@ -11,19 +11,12 @@ concurrency: cancel-in-progress: true jobs: - wait-dco: - uses: ./.github/workflows/_wait_for_checks.yml - permissions: - checks: read - secrets: inherit - with: - match_pattern: '^DCO$' check-changes: - needs: [wait-dco] runs-on: ubuntu-latest outputs: any_changed: ${{ steps.changed-tests.outputs.any_changed }} steps: + - uses: actions/checkout@v4 - id: get-pr-info uses: nv-gha-runners/get-pr-info@main - name: Check for changes in test-relevant directories @@ -38,17 +31,18 @@ jobs: pyproject.toml setup.py base_sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }} - wait-unit-tests: - needs: [wait-dco] + wait-checks: + needs: [check-changes] + if: needs.check-changes.outputs.any_changed == 'true' uses: ./.github/workflows/_wait_for_checks.yml permissions: checks: read secrets: inherit with: - match_pattern: '^linux$' # Unit tests / linux - delay: 60s + match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass + delay: 300s gpu-tests: - needs: [check-changes, wait-unit-tests] + needs: [check-changes, wait-checks] if: needs.check-changes.outputs.any_changed == 'true' # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md runs-on: linux-amd64-gpu-h100-latest-1 From 660c3d9d7067ce4151dc48636f50ac805f1ceb0d Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Mon, 11 Aug 2025 22:51:16 +0530 Subject: [PATCH 4/4] temp Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --- .github/workflows/gpu_tests.yml | 10 +++++----- .github/workflows/unit_tests.yml | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml index c3726609f..b9b2ce540 100644 --- a/.github/workflows/gpu_tests.yml +++ b/.github/workflows/gpu_tests.yml @@ -11,7 +11,7 @@ concurrency: cancel-in-progress: true jobs: - check-changes: + check-file-changes: runs-on: ubuntu-latest outputs: any_changed: ${{ steps.changed-tests.outputs.any_changed }} @@ -32,8 +32,8 @@ jobs: setup.py base_sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }} wait-checks: - needs: [check-changes] - if: needs.check-changes.outputs.any_changed == 'true' + needs: [check-file-changes] + if: needs.check-file-changes.outputs.any_changed == 'true' uses: ./.github/workflows/_wait_for_checks.yml permissions: checks: read @@ -42,8 +42,8 @@ jobs: match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass delay: 300s gpu-tests: - needs: [check-changes, wait-checks] - if: needs.check-changes.outputs.any_changed == 'true' + needs: [check-file-changes, wait-checks] + if: needs.check-file-changes.outputs.any_changed == 'true' # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md runs-on: linux-amd64-gpu-h100-latest-1 timeout-minutes: 60 diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 34f0124c7..de3568158 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -26,7 +26,7 @@ concurrency: cancel-in-progress: true jobs: - wait-dco: + check-dco: uses: ./.github/workflows/_wait_for_checks.yml permissions: checks: read @@ -34,7 +34,7 @@ jobs: with: match_pattern: '^DCO$' linux: - needs: [wait-dco] + needs: [check-dco] runs-on: ubuntu-latest timeout-minutes: 30 steps: