|
| 1 | +#!/bin/bash -e |
| 2 | +# |
| 3 | +# Ensure Fluxion marks all ranks down even if some ranks are excluded |
| 4 | +# |
| 5 | + |
| 6 | +log() { printf "issue#1182: $@\n" >&2; } |
| 7 | + |
| 8 | +# Need a few ranks for this test, so start a new instance of size=4 |
| 9 | +if test "$ISSUE_1182_ACTIVE" != "t"; then |
| 10 | + export ISSUE_1182_ACTIVE=t |
| 11 | + log "Re-launching test script under flux-start" |
| 12 | + exec flux start -s 4 $0 |
| 13 | +fi |
| 14 | + |
| 15 | +cat <<'EOF' >rcheck.py |
| 16 | +import sys |
| 17 | +import flux |
| 18 | +from flux.resource.list import ResourceListRPC |
| 19 | +
|
| 20 | +h = flux.Flux() |
| 21 | +
|
| 22 | +rpc1 = ResourceListRPC(h, "resource.sched-status", nodeid=0) |
| 23 | +rpc2 = ResourceListRPC(h, "sched.resource-status", nodeid=0) |
| 24 | +
|
| 25 | +rset = rpc1.get() |
| 26 | +fluxion = rpc2.get() |
| 27 | +
|
| 28 | +def symmetric_diff(a, b): |
| 29 | + return (a|b) - (a&b) |
| 30 | +
|
| 31 | +diff = symmetric_diff(rset.down, fluxion.down) |
| 32 | +if diff.ranks: |
| 33 | + print("difference detected between fluxion and core down ranks:") |
| 34 | + print(f"hosts: {diff.nodelist}") |
| 35 | + print(f"ranks: {diff.ranks}") |
| 36 | + sys.exit(1) |
| 37 | +sys.exit(0) |
| 38 | +EOF |
| 39 | + |
| 40 | +log "Unloading modules..." |
| 41 | +flux module remove sched-simple |
| 42 | +flux module remove resource |
| 43 | + |
| 44 | +# Exclude rank 0 |
| 45 | +flux config load <<EOF |
| 46 | +[resource] |
| 47 | +exclude = "0,2" |
| 48 | +EOF |
| 49 | + |
| 50 | +flux module load resource monitor-force-up |
| 51 | + |
| 52 | +# Drain rank 3. Scheduler should only see rank 1 as up |
| 53 | +log "draining rank 3" |
| 54 | +flux resource drain 3 |
| 55 | + |
| 56 | +flux resource status |
| 57 | + |
| 58 | +flux module load sched-fluxion-resource |
| 59 | +flux module load sched-fluxion-qmanager |
| 60 | + |
| 61 | +log "comparing fluxion down ranks with flux-core resource module:" |
| 62 | +flux resource list |
| 63 | +FLUX_RESOURCE_LIST_RPC=sched.resource-status flux resource list |
| 64 | +flux python ./rcheck.py |
| 65 | + |
| 66 | +log "reloading sched-simple..." |
| 67 | +flux module remove sched-fluxion-qmanager |
| 68 | +flux module remove sched-fluxion-resource |
| 69 | +flux module load sched-simple |
0 commit comments