|
| 1 | +name: Tests with Slurm |
| 2 | +on: [push, pull_request] |
| 3 | + |
| 4 | +jobs: |
| 5 | + tests: |
| 6 | + name: "Slurm Testing" |
| 7 | + runs-on: ubuntu-latest |
| 8 | + container: |
| 9 | + image: ghcr.io/natejenkins/slurm-docker-cluster:23.11.7 |
| 10 | + options: --privileged --hostname slurmctld |
| 11 | + defaults: |
| 12 | + run: |
| 13 | + shell: bash -el {0} |
| 14 | + steps: |
| 15 | + |
| 16 | + - uses: actions/checkout@v5 |
| 17 | + with: |
| 18 | + fetch-depth: 0 |
| 19 | + |
| 20 | + - name: Install base tools |
| 21 | + run: | |
| 22 | + apt-get update |
| 23 | + apt-get install -y sudo git python3 python3-pip python3-venv curl |
| 24 | +
|
| 25 | + - name: Start Slurm services |
| 26 | + run: | |
| 27 | + # Start munge authentication service |
| 28 | + sudo -u munge /usr/sbin/munged |
| 29 | + |
| 30 | + # Start Slurm controller |
| 31 | + /usr/sbin/slurmctld |
| 32 | + |
| 33 | + # Start Slurm daemon |
| 34 | + /usr/sbin/slurmd |
| 35 | + |
| 36 | + # Give services time to start |
| 37 | + sleep 5 |
| 38 | + |
| 39 | + # Verify Slurm is working |
| 40 | + sinfo |
| 41 | + squeue |
| 42 | +
|
| 43 | + - name: Create submit user |
| 44 | + run: | |
| 45 | + useradd -m -s /bin/bash submituser |
| 46 | + echo "submituser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers |
| 47 | +
|
| 48 | + - name: Ensure submituser can access workspace |
| 49 | + run: | |
| 50 | + mkdir -p "$GITHUB_WORKSPACE/test_project" |
| 51 | + chmod -R a+rwX "$GITHUB_WORKSPACE" |
| 52 | +
|
| 53 | + - name: Install Python dependencies |
| 54 | + run: | |
| 55 | + python3 -m pip install --upgrade pip |
| 56 | + python3 -m pip install -e "$GITHUB_WORKSPACE" |
| 57 | + python3 -m pip install python-crontab |
| 58 | +
|
| 59 | + - name: Install additional packages |
| 60 | + run: | |
| 61 | + python3 -m pip install -U bilby bilby_pipe==1.4.0 pesummary |
| 62 | + python3 -m pip install git+https://git.ligo.org/asimov/pipelines/gwdata.git@update-htcondor || echo "gwdata install failed, continuing..." |
| 63 | +
|
| 64 | + - name: Set up git |
| 65 | + run: | |
| 66 | + su - submituser <<SUBMITUSER |
| 67 | + set -euo pipefail |
| 68 | + git config --global init.defaultBranch master |
| 69 | + git config --global user.email "you@example.com" |
| 70 | + git config --global user.name "Your Name" |
| 71 | + SUBMITUSER |
| 72 | +
|
| 73 | + - name: Start MockGWDataFindServer in background |
| 74 | + run: | |
| 75 | + python3 << EOF & |
| 76 | + from tests.mock_gwdatafind_server import MockGWDataFindServer |
| 77 | + import time |
| 78 | + |
| 79 | + frame_configs = { |
| 80 | + ('H', 'H1_LOSC_16_V1'): ['file://$GITHUB_WORKSPACE/tests/test_data/frames/H-H1_GWOSC_16KHZ_R1-1126259447-32.gwf'], |
| 81 | + ('L', 'L1_LOSC_16_V1'): ['file://$GITHUB_WORKSPACE/tests/test_data/frames/L-L1_GWOSC_16KHZ_R1-1126259447-32.gwf'] |
| 82 | + } |
| 83 | + |
| 84 | + server = MockGWDataFindServer(port=8765, frame_configs=frame_configs) |
| 85 | + server.start() |
| 86 | + print("MockGWDataFindServer started on port 8765", flush=True) |
| 87 | + |
| 88 | + try: |
| 89 | + while True: |
| 90 | + time.sleep(1) |
| 91 | + except KeyboardInterrupt: |
| 92 | + server.stop() |
| 93 | + EOF |
| 94 | + |
| 95 | + # Give the server time to start |
| 96 | + sleep 3 |
| 97 | +
|
| 98 | + - name: Verify server is running |
| 99 | + run: | |
| 100 | + curl -f http://localhost:8765/api/v1/gwf/H/H1_LOSC_16_V1/1126259460,1126259464/file.json || echo "Server check failed but continuing..." |
| 101 | +
|
| 102 | + - name: Create asimov project |
| 103 | + run: | |
| 104 | + su - submituser <<SUBMITUSER |
| 105 | + set -euo pipefail |
| 106 | + cd $GITHUB_WORKSPACE/test_project |
| 107 | + asimov init "Test Project" |
| 108 | + SUBMITUSER |
| 109 | +
|
| 110 | + - name: Verify Slurm scheduler was detected |
| 111 | + run: | |
| 112 | + su - submituser <<SUBMITUSER |
| 113 | + set -euo pipefail |
| 114 | + cd $GITHUB_WORKSPACE/test_project |
| 115 | + |
| 116 | + # Check that scheduler type is set to slurm |
| 117 | + scheduler_type=\$(grep -A 1 "\\[scheduler\\]" .asimov/asimov.conf | grep "type" | awk -F'=' '{print \$2}' | tr -d ' ') |
| 118 | + echo "Detected scheduler type: \$scheduler_type" |
| 119 | + |
| 120 | + if [ "\$scheduler_type" != "slurm" ]; then |
| 121 | + echo "ERROR: Expected scheduler type 'slurm' but got '\$scheduler_type'" |
| 122 | + exit 1 |
| 123 | + fi |
| 124 | + |
| 125 | + echo "✓ Slurm scheduler correctly detected" |
| 126 | + SUBMITUSER |
| 127 | +
|
| 128 | + - name: Configure asimov settings |
| 129 | + run: | |
| 130 | + su - submituser <<SUBMITUSER |
| 131 | + set -euo pipefail |
| 132 | + cd $GITHUB_WORKSPACE/test_project |
| 133 | + asimov configuration update slurm/user submituser |
| 134 | + asimov configuration update slurm/cron_minute '*/1' |
| 135 | + SUBMITUSER |
| 136 | +
|
| 137 | + - name: Apply GWOSC quick test blueprint |
| 138 | + run: | |
| 139 | + su - submituser <<SUBMITUSER |
| 140 | + set -euo pipefail |
| 141 | + cd $GITHUB_WORKSPACE/test_project |
| 142 | + asimov apply -f "$GITHUB_WORKSPACE/tests/test_blueprints/gwosc_quick_test.yaml" |
| 143 | + SUBMITUSER |
| 144 | +
|
| 145 | + - name: Add event from GWOSC |
| 146 | + run: | |
| 147 | + su - submituser <<SUBMITUSER |
| 148 | + set -euo pipefail |
| 149 | + cd $GITHUB_WORKSPACE/test_project |
| 150 | + asimov apply -f "$GITHUB_WORKSPACE/tests/test_blueprints/gwosc_event.yaml" |
| 151 | + SUBMITUSER |
| 152 | +
|
| 153 | + - name: Run basic scheduler tests |
| 154 | + run: | |
| 155 | + cd $GITHUB_WORKSPACE |
| 156 | + python3 -m unittest tests.test_scheduler -v |
| 157 | +
|
| 158 | + - name: Test Slurm job submission |
| 159 | + run: | |
| 160 | + su - submituser <<SUBMITUSER |
| 161 | + set -euo pipefail |
| 162 | + cd $GITHUB_WORKSPACE/test_project |
| 163 | + |
| 164 | + # Test basic Slurm functionality |
| 165 | + echo "Testing basic Slurm job submission..." |
| 166 | + |
| 167 | + # Create a simple test job |
| 168 | + cat > test_job.sh << 'TESTJOB' |
| 169 | + #!/bin/bash |
| 170 | + #SBATCH --job-name=test-asimov |
| 171 | + #SBATCH --output=test_output.log |
| 172 | + #SBATCH --error=test_error.log |
| 173 | + #SBATCH --time=00:01:00 |
| 174 | + |
| 175 | + echo "Slurm job running successfully" |
| 176 | + sleep 2 |
| 177 | + echo "Job completed" |
| 178 | + TESTJOB |
| 179 | + |
| 180 | + # Submit the job |
| 181 | + job_id=\$(sbatch --parsable test_job.sh) |
| 182 | + echo "Submitted test job: \$job_id" |
| 183 | + |
| 184 | + # Wait for job to complete |
| 185 | + for i in {1..30}; do |
| 186 | + state=\$(squeue -j \$job_id -h -o "%t" 2>/dev/null || echo "DONE") |
| 187 | + echo "Job state: \$state" |
| 188 | + |
| 189 | + if [ "\$state" == "DONE" ] || [ -z "\$state" ]; then |
| 190 | + echo "Job completed" |
| 191 | + break |
| 192 | + fi |
| 193 | + |
| 194 | + sleep 2 |
| 195 | + done |
| 196 | + |
| 197 | + # Check output |
| 198 | + if [ -f test_output.log ]; then |
| 199 | + echo "Test job output:" |
| 200 | + cat test_output.log |
| 201 | + fi |
| 202 | + |
| 203 | + echo "✓ Basic Slurm job submission works" |
| 204 | + SUBMITUSER |
| 205 | +
|
| 206 | + - name: Show Slurm configuration |
| 207 | + run: | |
| 208 | + echo "=== Slurm Configuration ===" |
| 209 | + sinfo -o "%P %a %l %D %N" |
| 210 | + echo "" |
| 211 | + echo "=== Slurm Queue ===" |
| 212 | + squeue |
| 213 | + echo "" |
| 214 | + echo "=== Slurm Version ===" |
| 215 | + sinfo --version |
| 216 | +
|
| 217 | + - name: Archive testing artifacts |
| 218 | + if: always() |
| 219 | + uses: actions/upload-artifact@v4 |
| 220 | + with: |
| 221 | + name: slurm-test-artifacts |
| 222 | + path: | |
| 223 | + test_project/ |
| 224 | + retention-days: 5 |
| 225 | + |
| 226 | + - name: Stop background server |
| 227 | + if: always() |
| 228 | + run: | |
| 229 | + # Kill the background Python process running the server |
| 230 | + pkill -f "MockGWDataFindServer" || true |
0 commit comments