NVIDIA · kevalmorabia97 · Aug 26, 2025 · Aug 26, 2025
@@ -5,6 +5,7 @@ on:
     branches: [main, release/*]
   schedule:
     - cron: "0 0 * * *" # Nightly
+  workflow_dispatch: # On-demand
 
 # Cancel previous runs if new commit is pushed to the same PR
 concurrency:

@@ -7,10 +7,11 @@ on:
     # NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
   schedule:
     - cron: "0 0 * * *" # Nightly
+  workflow_dispatch: # On-demand
 
 # Cancel previous runs if new commit is pushed to the same PR
 concurrency:
-  group: ${{ github.workflow }}-${{ github.sha }}
+  group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
   cancel-in-progress: true
 
 jobs:
@@ -45,20 +46,27 @@ jobs:
     with:
       match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass
       delay: 300s
-  gpu-tests:
+  gpu-tests-pr:
     needs: [check-file-changes, wait-checks]
     if: needs.check-file-changes.outputs.any_changed == 'true'
     # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
-    runs-on: linux-amd64-gpu-h100-latest-1
+    runs-on: linux-amd64-gpu-l4-latest-1
     timeout-minutes: 90
-    container:
+    container: &gpu_container
       image: nvcr.io/nvidia/pytorch:25.06-py3
       env:
         GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
         LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
         PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
-    steps:
+    steps: &gpu_steps
       - uses: actions/checkout@v4
       - uses: nv-gha-runners/setup-proxy-cache@main
       - name: Run gpu tests
         run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
+  gpu-tests-non-pr:
+    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
+    # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
+    runs-on: linux-amd64-gpu-h100-latest-1
+    timeout-minutes: 90
+    container: *gpu_container
+    steps: *gpu_steps
@@ -7,6 +7,7 @@ on:
     branches: [main]
   schedule:
     - cron: "0 0 * * *" # Nightly
+  workflow_dispatch: # On-demand
 
 # Cancel previous runs if new commit is pushed
 concurrency:

@@ -22,6 +22,7 @@ on:
       - "tox.ini"
   schedule:
     - cron: "0 0 * * *" # Nightly
+  workflow_dispatch: # On-demand
 
 # Cancel previous runs if new commit is pushed
 concurrency:

diff --git a/tests/unit/torch/utils/test_megatron_preprocess_data.py b/tests/unit/torch/utils/test_megatron_preprocess_data.py
@@ -15,12 +15,15 @@
 
 import json
 import os
+import platform
 from pathlib import Path
 
 import pytest
 from _test_utils.import_helper import skip_if_no_megatron
 
-# Skip the test if megatron is not available
+if platform.system() == "Windows":
+    pytest.skip("Skipping on Windows", allow_module_level=True)
+
 skip_if_no_megatron()
 datasets = pytest.importorskip("datasets")
 _ = pytest.importorskip("transformers")
@@ -52,7 +55,7 @@ def download_and_prepare_minipile_dataset(output_dir: Path) -> Path:
     return jsonl_file
 
 
-def test_megatron_preprocess_data_with_minipile_dataset(tmp_path):
+def test_megatron_preprocess_data_with_minipile_dataset(skip_on_windows, tmp_path):
     """Test megatron_preprocess_data function with nanotron/minipile_100_samples dataset.
 
     This test: