diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml index 5c349eb18..fe900000a 100644 --- a/.github/workflows/code_quality.yml +++ b/.github/workflows/code_quality.yml @@ -5,6 +5,7 @@ on: branches: [main, release/*] schedule: - cron: "0 0 * * *" # Nightly + workflow_dispatch: # On-demand # Cancel previous runs if new commit is pushed to the same PR concurrency: diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml index 04b5ae021..14706be0c 100644 --- a/.github/workflows/gpu_tests.yml +++ b/.github/workflows/gpu_tests.yml @@ -7,10 +7,11 @@ on: # NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used schedule: - cron: "0 0 * * *" # Nightly + workflow_dispatch: # On-demand # Cancel previous runs if new commit is pushed to the same PR concurrency: - group: ${{ github.workflow }}-${{ github.sha }} + group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }} cancel-in-progress: true jobs: @@ -45,20 +46,27 @@ jobs: with: match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass delay: 300s - gpu-tests: + gpu-tests-pr: needs: [check-file-changes, wait-checks] if: needs.check-file-changes.outputs.any_changed == 'true' # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md - runs-on: linux-amd64-gpu-h100-latest-1 + runs-on: linux-amd64-gpu-l4-latest-1 timeout-minutes: 90 - container: + container: &gpu_container image: nvcr.io/nvidia/pytorch:25.06-py3 env: GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path. PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages - steps: + steps: &gpu_steps - uses: actions/checkout@v4 - uses: nv-gha-runners/setup-proxy-cache@main - name: Run gpu tests run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env + gpu-tests-non-pr: + if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }} + # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md + runs-on: linux-amd64-gpu-h100-latest-1 + timeout-minutes: 90 + container: *gpu_container + steps: *gpu_steps diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 7648e1e58..279e3834e 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -7,6 +7,7 @@ on: branches: [main] schedule: - cron: "0 0 * * *" # Nightly + workflow_dispatch: # On-demand # Cancel previous runs if new commit is pushed concurrency: diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 61ef3f4ea..5e2aece4c 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -22,6 +22,7 @@ on: - "tox.ini" schedule: - cron: "0 0 * * *" # Nightly + workflow_dispatch: # On-demand # Cancel previous runs if new commit is pushed concurrency: diff --git a/tests/unit/torch/utils/test_megatron_preprocess_data.py b/tests/unit/torch/utils/test_megatron_preprocess_data.py index 4709667a5..679d1d8ed 100644 --- a/tests/unit/torch/utils/test_megatron_preprocess_data.py +++ b/tests/unit/torch/utils/test_megatron_preprocess_data.py @@ -15,12 +15,15 @@ import json import os +import platform from pathlib import Path import pytest from _test_utils.import_helper import skip_if_no_megatron -# Skip the test if megatron is not available +if platform.system() == "Windows": + pytest.skip("Skipping on Windows", allow_module_level=True) + skip_if_no_megatron() datasets = pytest.importorskip("datasets") _ = pytest.importorskip("transformers") @@ -52,7 +55,7 @@ def download_and_prepare_minipile_dataset(output_dir: Path) -> Path: return jsonl_file -def test_megatron_preprocess_data_with_minipile_dataset(tmp_path): +def test_megatron_preprocess_data_with_minipile_dataset(skip_on_windows, tmp_path): """Test megatron_preprocess_data function with nanotron/minipile_100_samples dataset. This test: