|
| 1 | +name: Cluster Tests (SLURM) |
| 2 | +# Submits SLURM jobs on the HPC cluster and polls for completion. |
| 3 | +# |
| 4 | +# Prerequisites |
| 5 | +# ------------- |
| 6 | +# A GitHub Actions self-hosted runner must be registered on the cluster |
| 7 | +# login node with the labels [self-hosted, linux, cluster]. The runner |
| 8 | +# process must have access to `sbatch`, `squeue`, and `sacct` (i.e. the |
| 9 | +# SLURM binaries must be on its PATH). |
| 10 | +# |
| 11 | +# Tiers 1–3 run on every push and pull-request to master/develop. |
| 12 | +# Tier 4 (real acquired-data tests, ≤12 h) runs only on push to master. |
| 13 | +# Set the repository-level Actions variable PW_TEST_DATA_DIR to the path |
| 14 | +# of the real-data directory on the cluster to enable Tier 4. |
| 15 | + |
| 16 | +on: |
| 17 | + push: |
| 18 | + branches: |
| 19 | + - master |
| 20 | + - develop |
| 21 | + pull_request: |
| 22 | + branches: |
| 23 | + - master |
| 24 | + - develop |
| 25 | + workflow_dispatch: |
| 26 | + |
| 27 | +jobs: |
| 28 | + # --------------------------------------------------------------------------- |
| 29 | + # Tiers 1 + 2 + 3: unit tests, template validation, synthetic-data |
| 30 | + # integration tests. No real acquired data required. |
| 31 | + # --------------------------------------------------------------------------- |
| 32 | + tiers-1-3: |
| 33 | + name: Tiers 1–3 (unit + template + integration) |
| 34 | + runs-on: [self-hosted, linux, cluster] |
| 35 | + timeout-minutes: 75 |
| 36 | + |
| 37 | + steps: |
| 38 | + - name: Checkout code |
| 39 | + uses: actions/checkout@v4 |
| 40 | + |
| 41 | + - name: Create results directory |
| 42 | + run: mkdir -p "$GITHUB_WORKSPACE/test-results" |
| 43 | + |
| 44 | + - name: Submit tiers 1+2 and 3 to SLURM |
| 45 | + id: submit |
| 46 | + run: | |
| 47 | + COMMON=( |
| 48 | + --parsable |
| 49 | + --chdir="$GITHUB_WORKSPACE" |
| 50 | + --export="ALL,PW_PROJECT_DIR=$GITHUB_WORKSPACE" |
| 51 | + ) |
| 52 | +
|
| 53 | + jid1=$(sbatch "${COMMON[@]}" tools/cluster_tests/tier1_2.sbatch) |
| 54 | + echo "Submitted Tier 1+2: job $jid1" |
| 55 | +
|
| 56 | + jid2=$(sbatch "${COMMON[@]}" \ |
| 57 | + --dependency=afterok:"$jid1" \ |
| 58 | + tools/cluster_tests/tier3.sbatch) |
| 59 | + echo "Submitted Tier 3: job $jid2 (depends on $jid1)" |
| 60 | +
|
| 61 | + echo "jid1=$jid1" >> "$GITHUB_OUTPUT" |
| 62 | + echo "jid2=$jid2" >> "$GITHUB_OUTPUT" |
| 63 | +
|
| 64 | + - name: Wait for SLURM jobs to finish |
| 65 | + run: | |
| 66 | + jobs="${{ steps.submit.outputs.jid1 }},${{ steps.submit.outputs.jid2 }}" |
| 67 | + echo "Polling SLURM jobs: $jobs" |
| 68 | + while squeue -j "$jobs" --noheader 2>/dev/null | grep -q .; do |
| 69 | + echo "$(date -u '+%H:%M:%S') UTC — still running …" |
| 70 | + sleep 30 |
| 71 | + done |
| 72 | + echo "All jobs finished." |
| 73 | +
|
| 74 | + - name: Check SLURM exit codes |
| 75 | + run: | |
| 76 | + failed=0 |
| 77 | + for jid in ${{ steps.submit.outputs.jid1 }} ${{ steps.submit.outputs.jid2 }}; do |
| 78 | + state=$(sacct -j "$jid" --format=State --noheader -P | head -1 | tr -d ' ') |
| 79 | + echo "Job $jid: $state" |
| 80 | + if [[ "$state" != "COMPLETED" ]]; then |
| 81 | + echo "::error::SLURM job $jid did not complete successfully (state=$state)" |
| 82 | + failed=1 |
| 83 | + fi |
| 84 | + done |
| 85 | + exit $failed |
| 86 | +
|
| 87 | + - name: Upload JUnit XML results |
| 88 | + if: always() |
| 89 | + uses: actions/upload-artifact@v4 |
| 90 | + with: |
| 91 | + name: cluster-test-results-tier1-3 |
| 92 | + path: | |
| 93 | + ${{ github.workspace }}/test-results/tier1_2_*.xml |
| 94 | + ${{ github.workspace }}/test-results/tier3_*.xml |
| 95 | +
|
| 96 | + # --------------------------------------------------------------------------- |
| 97 | + # Tier 4: real acquired-data tests. |
| 98 | + # Only runs on push to master. Requires the repository-level Actions |
| 99 | + # variable PW_TEST_DATA_DIR to point to the lab data directory on the |
| 100 | + # cluster. If the variable is empty the sbatch job starts but all |
| 101 | + # real_data tests are skipped automatically (no failure). |
| 102 | + # --------------------------------------------------------------------------- |
| 103 | + tier-4: |
| 104 | + name: Tier 4 (real data, master only) |
| 105 | + if: github.ref == 'refs/heads/master' && github.event_name == 'push' |
| 106 | + needs: tiers-1-3 |
| 107 | + runs-on: [self-hosted, linux, cluster] |
| 108 | + timeout-minutes: 780 # 13 h — covers the 12 h walltime + queue wait |
| 109 | + env: |
| 110 | + PW_TEST_DATA_DIR: ${{ vars.PW_TEST_DATA_DIR }} |
| 111 | + |
| 112 | + steps: |
| 113 | + - name: Checkout code |
| 114 | + uses: actions/checkout@v4 |
| 115 | + |
| 116 | + - name: Create results directory |
| 117 | + run: mkdir -p "$GITHUB_WORKSPACE/test-results" |
| 118 | + |
| 119 | + - name: Submit tier 4 to SLURM |
| 120 | + id: submit |
| 121 | + run: | |
| 122 | + jid=$(sbatch --parsable \ |
| 123 | + --chdir="$GITHUB_WORKSPACE" \ |
| 124 | + --export="ALL,PW_PROJECT_DIR=$GITHUB_WORKSPACE,PW_TEST_DATA_DIR=${PW_TEST_DATA_DIR:-}" \ |
| 125 | + tools/cluster_tests/tier4.sbatch) |
| 126 | + echo "Submitted Tier 4: job $jid" |
| 127 | + echo "jid=$jid" >> "$GITHUB_OUTPUT" |
| 128 | +
|
| 129 | + - name: Wait for SLURM job to finish |
| 130 | + run: | |
| 131 | + jid="${{ steps.submit.outputs.jid }}" |
| 132 | + echo "Polling SLURM job: $jid" |
| 133 | + while squeue -j "$jid" --noheader 2>/dev/null | grep -q .; do |
| 134 | + echo "$(date -u '+%H:%M:%S') UTC — still running …" |
| 135 | + sleep 120 |
| 136 | + done |
| 137 | + echo "Job finished." |
| 138 | +
|
| 139 | + - name: Check SLURM exit code |
| 140 | + run: | |
| 141 | + jid="${{ steps.submit.outputs.jid }}" |
| 142 | + state=$(sacct -j "$jid" --format=State --noheader -P | head -1 | tr -d ' ') |
| 143 | + echo "Job $jid: $state" |
| 144 | + if [[ "$state" != "COMPLETED" ]]; then |
| 145 | + echo "::error::Tier 4 SLURM job $jid did not complete successfully (state=$state)" |
| 146 | + exit 1 |
| 147 | + fi |
| 148 | +
|
| 149 | + - name: Upload JUnit XML results |
| 150 | + if: always() |
| 151 | + uses: actions/upload-artifact@v4 |
| 152 | + with: |
| 153 | + name: cluster-test-results-tier4 |
| 154 | + path: ${{ github.workspace }}/test-results/tier4_*.xml |
0 commit comments