Deprecate ModelOpt custom docker and directly use TRT-LLM docker #329

Workflow file for this run

.github/workflows/gpu_tests.yml at e8b201f

	# NOTE: Make sure this file is consistent with .gitlab/tests.yml
	name: GPU tests

	on:
	push:
	branches: ["pull-request/[0-9]+"]
	# NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
	schedule:
	- cron: "0 0 * * *" # Nightly
	workflow_dispatch: # On-demand

	# Cancel previous runs if new commit is pushed to the same PR
	concurrency:
	group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref \|\| github.sha }}
	cancel-in-progress: true

	jobs:
	check-file-changes:
	if: startsWith(github.ref, 'refs/heads/pull-request/')
	runs-on: ubuntu-latest
	outputs:
	any_changed: ${{ steps.changed-tests.outputs.any_changed }}
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 0
	- id: get-pr-info
	uses: nv-gha-runners/get-pr-info@main
	# Get commit from main branch that is present in the PR to use as base for changed files
	- id: calculate-merge-base
	env:
	PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
	BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
	run: \|
	(echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") \| tee --append "${GITHUB_OUTPUT}"
	- name: Check for changes in test-relevant directories
	id: changed-tests
	uses: step-security/[email protected]
	with:
	base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
	sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
	files: \|
	.github/workflows/gpu_tests.yml
	modelopt/**
	tests/gpu/**
	tox.ini
	setup.py
	fail_on_initial_diff_error: true
	wait-checks:
	needs: [check-file-changes]
	if: needs.check-file-changes.outputs.any_changed == 'true'
	uses: ./.github/workflows/_wait_for_checks.yml
	permissions:
	checks: read
	secrets: inherit
	with:
	match_pattern: '^DCO$\|^linux$' # Wait for DCO and Unit tests / linux to pass
	delay: 300s
	gpu-tests-pr:
	needs: [check-file-changes, wait-checks]
	if: needs.check-file-changes.outputs.any_changed == 'true'
	# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
	runs-on: linux-amd64-gpu-l4-latest-1
	timeout-minutes: 90
	container: &gpu_container
	image: nvcr.io/nvidia/pytorch:25.06-py3
	env:
	GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
	LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn.so and libnv.so to path.
	PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
	steps: &gpu_steps
	- uses: actions/checkout@v4
	- uses: nv-gha-runners/setup-proxy-cache@main
	- name: Run gpu tests
	run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
	gpu-tests-non-pr:
	if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
	# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
	runs-on: linux-amd64-gpu-h100-latest-1
	timeout-minutes: 90
	container: *gpu_container
	steps: *gpu_steps
	gpu-pr-required-check:
	# Run even if gpu-tests-pr is skipped
	if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
	needs: [check-file-changes, gpu-tests-pr]
	runs-on: ubuntu-latest
	steps:
	- name: Required GPU tests did not succeed
	if: ${{ needs.check-file-changes.result != 'success' \|\| (needs.check-file-changes.outputs.any_changed == 'true' && needs.gpu-tests-pr.result != 'success') }}
	run: exit 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Deprecate ModelOpt custom docker and directly use TRT-LLM docker #329

Workflow file

Deprecate ModelOpt custom docker and directly use TRT-LLM docker #329

Uh oh!

Jobs

Run details

Workflow file for this run