Skip to content

Commit 4d51e4b

Browse files
committed
deployment and module flexibility
1 parent c9c79b3 commit 4d51e4b

File tree

11 files changed

+1065
-5
lines changed

11 files changed

+1065
-5
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
name: Cluster Tests (SLURM)
2+
# Submits SLURM jobs on the HPC cluster and polls for completion.
3+
#
4+
# Prerequisites
5+
# -------------
6+
# A GitHub Actions self-hosted runner must be registered on the cluster
7+
# login node with the labels [self-hosted, linux, cluster]. The runner
8+
# process must have access to `sbatch`, `squeue`, and `sacct` (i.e. the
9+
# SLURM binaries must be on its PATH).
10+
#
11+
# Tiers 1–3 run on every push and pull-request to master/develop.
12+
# Tier 4 (real acquired-data tests, ≤12 h) runs only on push to master.
13+
# Set the repository-level Actions variable PW_TEST_DATA_DIR to the path
14+
# of the real-data directory on the cluster to enable Tier 4.
15+
16+
on:
17+
push:
18+
branches:
19+
- master
20+
- develop
21+
pull_request:
22+
branches:
23+
- master
24+
- develop
25+
workflow_dispatch:
26+
27+
jobs:
28+
# ---------------------------------------------------------------------------
29+
# Tiers 1 + 2 + 3: unit tests, template validation, synthetic-data
30+
# integration tests. No real acquired data required.
31+
# ---------------------------------------------------------------------------
32+
tiers-1-3:
33+
name: Tiers 1–3 (unit + template + integration)
34+
runs-on: [self-hosted, linux, cluster]
35+
timeout-minutes: 75
36+
37+
steps:
38+
- name: Checkout code
39+
uses: actions/checkout@v4
40+
41+
- name: Create results directory
42+
run: mkdir -p "$GITHUB_WORKSPACE/test-results"
43+
44+
- name: Submit tiers 1+2 and 3 to SLURM
45+
id: submit
46+
run: |
47+
COMMON=(
48+
--parsable
49+
--chdir="$GITHUB_WORKSPACE"
50+
--export="ALL,PW_PROJECT_DIR=$GITHUB_WORKSPACE"
51+
)
52+
53+
jid1=$(sbatch "${COMMON[@]}" tools/cluster_tests/tier1_2.sbatch)
54+
echo "Submitted Tier 1+2: job $jid1"
55+
56+
jid2=$(sbatch "${COMMON[@]}" \
57+
--dependency=afterok:"$jid1" \
58+
tools/cluster_tests/tier3.sbatch)
59+
echo "Submitted Tier 3: job $jid2 (depends on $jid1)"
60+
61+
echo "jid1=$jid1" >> "$GITHUB_OUTPUT"
62+
echo "jid2=$jid2" >> "$GITHUB_OUTPUT"
63+
64+
- name: Wait for SLURM jobs to finish
65+
run: |
66+
jobs="${{ steps.submit.outputs.jid1 }},${{ steps.submit.outputs.jid2 }}"
67+
echo "Polling SLURM jobs: $jobs"
68+
while squeue -j "$jobs" --noheader 2>/dev/null | grep -q .; do
69+
echo "$(date -u '+%H:%M:%S') UTC — still running …"
70+
sleep 30
71+
done
72+
echo "All jobs finished."
73+
74+
- name: Check SLURM exit codes
75+
run: |
76+
failed=0
77+
for jid in ${{ steps.submit.outputs.jid1 }} ${{ steps.submit.outputs.jid2 }}; do
78+
state=$(sacct -j "$jid" --format=State --noheader -P | head -1 | tr -d ' ')
79+
echo "Job $jid: $state"
80+
if [[ "$state" != "COMPLETED" ]]; then
81+
echo "::error::SLURM job $jid did not complete successfully (state=$state)"
82+
failed=1
83+
fi
84+
done
85+
exit $failed
86+
87+
- name: Upload JUnit XML results
88+
if: always()
89+
uses: actions/upload-artifact@v4
90+
with:
91+
name: cluster-test-results-tier1-3
92+
path: |
93+
${{ github.workspace }}/test-results/tier1_2_*.xml
94+
${{ github.workspace }}/test-results/tier3_*.xml
95+
96+
# ---------------------------------------------------------------------------
97+
# Tier 4: real acquired-data tests.
98+
# Only runs on push to master. Requires the repository-level Actions
99+
# variable PW_TEST_DATA_DIR to point to the lab data directory on the
100+
# cluster. If the variable is empty the sbatch job starts but all
101+
# real_data tests are skipped automatically (no failure).
102+
# ---------------------------------------------------------------------------
103+
tier-4:
104+
name: Tier 4 (real data, master only)
105+
if: github.ref == 'refs/heads/master' && github.event_name == 'push'
106+
needs: tiers-1-3
107+
runs-on: [self-hosted, linux, cluster]
108+
timeout-minutes: 780 # 13 h — covers the 12 h walltime + queue wait
109+
env:
110+
PW_TEST_DATA_DIR: ${{ vars.PW_TEST_DATA_DIR }}
111+
112+
steps:
113+
- name: Checkout code
114+
uses: actions/checkout@v4
115+
116+
- name: Create results directory
117+
run: mkdir -p "$GITHUB_WORKSPACE/test-results"
118+
119+
- name: Submit tier 4 to SLURM
120+
id: submit
121+
run: |
122+
jid=$(sbatch --parsable \
123+
--chdir="$GITHUB_WORKSPACE" \
124+
--export="ALL,PW_PROJECT_DIR=$GITHUB_WORKSPACE,PW_TEST_DATA_DIR=${PW_TEST_DATA_DIR:-}" \
125+
tools/cluster_tests/tier4.sbatch)
126+
echo "Submitted Tier 4: job $jid"
127+
echo "jid=$jid" >> "$GITHUB_OUTPUT"
128+
129+
- name: Wait for SLURM job to finish
130+
run: |
131+
jid="${{ steps.submit.outputs.jid }}"
132+
echo "Polling SLURM job: $jid"
133+
while squeue -j "$jid" --noheader 2>/dev/null | grep -q .; do
134+
echo "$(date -u '+%H:%M:%S') UTC — still running …"
135+
sleep 120
136+
done
137+
echo "Job finished."
138+
139+
- name: Check SLURM exit code
140+
run: |
141+
jid="${{ steps.submit.outputs.jid }}"
142+
state=$(sacct -j "$jid" --format=State --noheader -P | head -1 | tr -d ' ')
143+
echo "Job $jid: $state"
144+
if [[ "$state" != "COMPLETED" ]]; then
145+
echo "::error::Tier 4 SLURM job $jid did not complete successfully (state=$state)"
146+
exit 1
147+
fi
148+
149+
- name: Upload JUnit XML results
150+
if: always()
151+
uses: actions/upload-artifact@v4
152+
with:
153+
name: cluster-test-results-tier4
154+
path: ${{ github.workspace }}/test-results/tier4_*.xml

0 commit comments

Comments
 (0)