|
7 | 7 | # NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
|
8 | 8 | schedule:
|
9 | 9 | - cron: "0 0 * * *" # Nightly
|
| 10 | + workflow_dispatch: # On-demand |
10 | 11 |
|
11 | 12 | # Cancel previous runs if new commit is pushed to the same PR
|
12 | 13 | concurrency:
|
13 |
| - group: ${{ github.workflow }}-${{ github.sha }} |
| 14 | + group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }} |
14 | 15 | cancel-in-progress: true
|
15 | 16 |
|
16 | 17 | jobs:
|
@@ -45,20 +46,27 @@ jobs:
|
45 | 46 | with:
|
46 | 47 | match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass
|
47 | 48 | delay: 300s
|
48 |
| - gpu-tests: |
| 49 | + gpu-tests-pr: |
49 | 50 | needs: [check-file-changes, wait-checks]
|
50 | 51 | if: needs.check-file-changes.outputs.any_changed == 'true'
|
51 | 52 | # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
|
52 |
| - runs-on: linux-amd64-gpu-h100-latest-1 |
| 53 | + runs-on: linux-amd64-gpu-l4-latest-1 |
53 | 54 | timeout-minutes: 90
|
54 |
| - container: |
| 55 | + container: &gpu_container |
55 | 56 | image: nvcr.io/nvidia/pytorch:25.06-py3
|
56 | 57 | env:
|
57 | 58 | GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
|
58 | 59 | LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
|
59 | 60 | PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
|
60 |
| - steps: |
| 61 | + steps: &gpu_steps |
61 | 62 | - uses: actions/checkout@v4
|
62 | 63 | - uses: nv-gha-runners/setup-proxy-cache@main
|
63 | 64 | - name: Run gpu tests
|
64 | 65 | run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
|
| 66 | + gpu-tests-non-pr: |
| 67 | + if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }} |
| 68 | + # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md |
| 69 | + runs-on: linux-amd64-gpu-h100-latest-1 |
| 70 | + timeout-minutes: 90 |
| 71 | + container: *gpu_container |
| 72 | + steps: *gpu_steps |
0 commit comments