Skip to content

Commit 90bcf3a

Browse files
Fix GPU test CI condition to run nightly or on demand (#269)
Signed-off-by: Keval Morabia <[email protected]>
1 parent b38dfb6 commit 90bcf3a

File tree

5 files changed

+21
-7
lines changed

5 files changed

+21
-7
lines changed

.github/workflows/code_quality.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ on:
55
branches: [main, release/*]
66
schedule:
77
- cron: "0 0 * * *" # Nightly
8+
workflow_dispatch: # On-demand
89

910
# Cancel previous runs if new commit is pushed to the same PR
1011
concurrency:

.github/workflows/gpu_tests.yml

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@ on:
77
# NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
88
schedule:
99
- cron: "0 0 * * *" # Nightly
10+
workflow_dispatch: # On-demand
1011

1112
# Cancel previous runs if new commit is pushed to the same PR
1213
concurrency:
13-
group: ${{ github.workflow }}-${{ github.sha }}
14+
group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
1415
cancel-in-progress: true
1516

1617
jobs:
@@ -45,20 +46,27 @@ jobs:
4546
with:
4647
match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass
4748
delay: 300s
48-
gpu-tests:
49+
gpu-tests-pr:
4950
needs: [check-file-changes, wait-checks]
5051
if: needs.check-file-changes.outputs.any_changed == 'true'
5152
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
52-
runs-on: linux-amd64-gpu-h100-latest-1
53+
runs-on: linux-amd64-gpu-l4-latest-1
5354
timeout-minutes: 90
54-
container:
55+
container: &gpu_container
5556
image: nvcr.io/nvidia/pytorch:25.06-py3
5657
env:
5758
GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
5859
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
5960
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
60-
steps:
61+
steps: &gpu_steps
6162
- uses: actions/checkout@v4
6263
- uses: nv-gha-runners/setup-proxy-cache@main
6364
- name: Run gpu tests
6465
run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
66+
gpu-tests-non-pr:
67+
if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
68+
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
69+
runs-on: linux-amd64-gpu-h100-latest-1
70+
timeout-minutes: 90
71+
container: *gpu_container
72+
steps: *gpu_steps

.github/workflows/pages.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ on:
77
branches: [main]
88
schedule:
99
- cron: "0 0 * * *" # Nightly
10+
workflow_dispatch: # On-demand
1011

1112
# Cancel previous runs if new commit is pushed
1213
concurrency:

.github/workflows/unit_tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ on:
2222
- "tox.ini"
2323
schedule:
2424
- cron: "0 0 * * *" # Nightly
25+
workflow_dispatch: # On-demand
2526

2627
# Cancel previous runs if new commit is pushed
2728
concurrency:

tests/unit/torch/utils/test_megatron_preprocess_data.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,15 @@
1515

1616
import json
1717
import os
18+
import platform
1819
from pathlib import Path
1920

2021
import pytest
2122
from _test_utils.import_helper import skip_if_no_megatron
2223

23-
# Skip the test if megatron is not available
24+
if platform.system() == "Windows":
25+
pytest.skip("Skipping on Windows", allow_module_level=True)
26+
2427
skip_if_no_megatron()
2528
datasets = pytest.importorskip("datasets")
2629
_ = pytest.importorskip("transformers")
@@ -52,7 +55,7 @@ def download_and_prepare_minipile_dataset(output_dir: Path) -> Path:
5255
return jsonl_file
5356

5457

55-
def test_megatron_preprocess_data_with_minipile_dataset(tmp_path):
58+
def test_megatron_preprocess_data_with_minipile_dataset(skip_on_windows, tmp_path):
5659
"""Test megatron_preprocess_data function with nanotron/minipile_100_samples dataset.
5760
5861
This test:

0 commit comments

Comments
 (0)