Skip to content

Deprecate ModelOpt custom docker and directly use TRT-LLM docker #329

Deprecate ModelOpt custom docker and directly use TRT-LLM docker

Deprecate ModelOpt custom docker and directly use TRT-LLM docker #329

Workflow file for this run

# NOTE: Make sure this file is consistent with .gitlab/tests.yml
name: GPU tests
on:
push:
branches: ["pull-request/[0-9]+"]
# NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
schedule:
- cron: "0 0 * * *" # Nightly
workflow_dispatch: # On-demand
# Cancel previous runs if new commit is pushed to the same PR
concurrency:
group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
cancel-in-progress: true
jobs:
check-file-changes:
if: startsWith(github.ref, 'refs/heads/pull-request/')
runs-on: ubuntu-latest
outputs:
any_changed: ${{ steps.changed-tests.outputs.any_changed }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- id: get-pr-info
uses: nv-gha-runners/get-pr-info@main
# Get commit from main branch that is present in the PR to use as base for changed files
- id: calculate-merge-base
env:
PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
run: |
(echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") | tee --append "${GITHUB_OUTPUT}"
- name: Check for changes in test-relevant directories
id: changed-tests
uses: step-security/[email protected]
with:
base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
files: |
.github/workflows/gpu_tests.yml
modelopt/**
tests/gpu/**
tox.ini
setup.py
fail_on_initial_diff_error: true
wait-checks:
needs: [check-file-changes]
if: needs.check-file-changes.outputs.any_changed == 'true'
uses: ./.github/workflows/_wait_for_checks.yml
permissions:
checks: read
secrets: inherit
with:
match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass
delay: 300s
gpu-tests-pr:
needs: [check-file-changes, wait-checks]
if: needs.check-file-changes.outputs.any_changed == 'true'
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
runs-on: linux-amd64-gpu-l4-latest-1
timeout-minutes: 90
container: &gpu_container
image: nvcr.io/nvidia/pytorch:25.06-py3
env:
GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
steps: &gpu_steps
- uses: actions/checkout@v4
- uses: nv-gha-runners/setup-proxy-cache@main
- name: Run gpu tests
run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
gpu-tests-non-pr:
if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
runs-on: linux-amd64-gpu-h100-latest-1
timeout-minutes: 90
container: *gpu_container
steps: *gpu_steps
gpu-pr-required-check:
# Run even if gpu-tests-pr is skipped
if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
needs: [check-file-changes, gpu-tests-pr]
runs-on: ubuntu-latest
steps:
- name: Required GPU tests did not succeed
if: ${{ needs.check-file-changes.result != 'success' || (needs.check-file-changes.outputs.any_changed == 'true' && needs.gpu-tests-pr.result != 'success') }}
run: exit 1