Skip to content

Addressing comments #209

Addressing comments

Addressing comments #209

Workflow file for this run

# NOTE: Make sure this file is consistent with .gitlab/tests.yml
name: GPU tests
on:
push:
branches: ["pull-request/[0-9]+"]
# NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
schedule:
- cron: "0 0 * * *" # Nightly
workflow_dispatch: # On-demand
# Cancel previous runs if new commit is pushed to the same PR
concurrency:
group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
cancel-in-progress: true
jobs:
check-file-changes:
if: startsWith(github.ref, 'refs/heads/pull-request/')
runs-on: ubuntu-latest
outputs:
any_changed: ${{ steps.changed-tests.outputs.any_changed }}
steps:
- uses: actions/checkout@v4
- id: get-pr-info
uses: nv-gha-runners/get-pr-info@main
- name: Check for changes in test-relevant directories
id: changed-tests
uses: step-security/[email protected]
with:
files: |
.github/workflows/gpu_tests.yml
modelopt/**
tests/gpu/**
tox.ini
pyproject.toml
setup.py
base_sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }}
wait-checks:
needs: [check-file-changes]
if: needs.check-file-changes.outputs.any_changed == 'true'
uses: ./.github/workflows/_wait_for_checks.yml
permissions:
checks: read
secrets: inherit
with:
match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass
delay: 300s
gpu-tests-pr:
needs: [check-file-changes, wait-checks]
if: needs.check-file-changes.outputs.any_changed == 'true'
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
runs-on: linux-amd64-gpu-l4-latest-1
timeout-minutes: 90
container: &gpu_container
image: nvcr.io/nvidia/pytorch:25.06-py3
env:
GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
steps: &gpu_steps
- uses: actions/checkout@v4
- uses: nv-gha-runners/setup-proxy-cache@main
- name: Run gpu tests
run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
gpu-tests-non-pr:
if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
runs-on: linux-amd64-gpu-h100-latest-1
timeout-minutes: 90
container: *gpu_container
steps: *gpu_steps