|
| 1 | +#!/bin/sh |
| 2 | +# |
| 3 | + |
| 4 | +test_description='Test flub bootstrap method' |
| 5 | + |
| 6 | +. `dirname $0`/sharness.sh |
| 7 | + |
| 8 | +test_under_flux 8 full |
| 9 | + |
| 10 | +export FLUX_SSH="${SHARNESS_TEST_SRCDIR}/scripts/tssh" |
| 11 | + |
| 12 | +test_expect_success 'broker fails with bad broker.boot-server' ' |
| 13 | + test_must_fail flux broker \ |
| 14 | + -Sbroker.rc1_path= -Sbroker.rc3_path= \ |
| 15 | + -Sbroker.boot-server=local://noexist/path \ |
| 16 | + /bin/true 2>server.err && |
| 17 | + grep "was not found" server.err |
| 18 | +' |
| 19 | + |
| 20 | +test_expect_success 'start a one node job with no extra ranks' ' |
| 21 | + id=$(flux batch -N1 --wrap sleep inf) && |
| 22 | + flux job wait-event -t10 $id memo && |
| 23 | + flux uri $id >test1.uri |
| 24 | +' |
| 25 | +test_expect_success 'job has size 1' ' |
| 26 | + size=$(flux proxy $(cat test1.uri) flux getattr size) && |
| 27 | + test $size -eq 1 |
| 28 | +' |
| 29 | +test_expect_success 'flub bootstrap fails with no available ranks' ' |
| 30 | + test_must_fail flux broker \ |
| 31 | + -Sbroker.boot-server=$(cat test1.uri) 2>noranks.err |
| 32 | + grep "no available ranks" noranks.err |
| 33 | +' |
| 34 | +test_expect_success 'clean up' ' |
| 35 | + flux cancel --all |
| 36 | +' |
| 37 | + |
| 38 | +# |
| 39 | +# Start 2 node batch job with one extra slot. |
| 40 | +# Submit 1 node broker job that fills the slot. |
| 41 | +# Run a parallel job across all three nodes in the batch job. |
| 42 | +# |
| 43 | + |
| 44 | +test_expect_success 'create config with fake resources' ' |
| 45 | + cat >fake2.toml <<-EOT |
| 46 | + [resource] |
| 47 | + noverify = true |
| 48 | + [[resource.config]] |
| 49 | + hosts = "a,b,c" |
| 50 | + cores = "0-3" |
| 51 | + EOT |
| 52 | +' |
| 53 | +test_expect_success 'start a two node job with one extra rank' ' |
| 54 | + id=$(flux batch -N2 \ |
| 55 | + --broker-opts=--config-path=fake2.toml \ |
| 56 | + --broker-opts=-Ssize=3 \ |
| 57 | + --broker-opts=-Sbroker.quorum=2 \ |
| 58 | + --broker-opts=-Stbon.topo=kary:0 \ |
| 59 | + --wrap sleep inf) && |
| 60 | + flux job wait-event -t10 $id memo && |
| 61 | + flux uri $id >test2.uri |
| 62 | +' |
| 63 | +test_expect_success 'job has size 3' ' |
| 64 | + size=$(flux proxy $(cat test2.uri) flux getattr size) && |
| 65 | + test $size -eq 3 |
| 66 | +' |
| 67 | +test_expect_success 'overlay status shows extra node offline' ' |
| 68 | + flux proxy $(cat test2.uri) \ |
| 69 | + flux overlay status --wait=partial --no-pretty >ov2.out && |
| 70 | + grep "2 extra0: offline" ov2.out |
| 71 | +' |
| 72 | +test_expect_success 'submit a job that starts the extra broker' ' |
| 73 | + flux submit -N1 flux broker \ |
| 74 | + --config-path=fake2.toml \ |
| 75 | + -Stbon.topo=kary:0 \ |
| 76 | + -Sbroker.boot-server=$(cat test2.uri) |
| 77 | +' |
| 78 | +test_expect_success 'overlay status shows extra rank connected' ' |
| 79 | + run_timeout 20 flux proxy $(cat test2.uri) \ |
| 80 | + flux overlay status --wait=full --no-pretty >ov2b.out && |
| 81 | + grep "2 extra0: full" ov2b.out |
| 82 | +' |
| 83 | +test_expect_success 'run a 3 node job in the expanded instance' ' |
| 84 | + run_timeout 30 flux proxy $(cat test2.uri) \ |
| 85 | + flux run --label-io -N3 flux pmi barrier |
| 86 | +' |
| 87 | +test_expect_success 'clean up' ' |
| 88 | + flux cancel --all |
| 89 | +' |
| 90 | + |
| 91 | +# |
| 92 | +# Start 3 node batch job with four extra slots (kary:2). |
| 93 | +# Submit 4 node broker job that fills the slots. |
| 94 | +# Run a parallel job across all seven nodes in the batch job. |
| 95 | +# |
| 96 | + |
| 97 | +test_expect_success 'create config with fake resources' ' |
| 98 | + cat >fake3.toml <<-EOT |
| 99 | + [resource] |
| 100 | + noverify = true |
| 101 | + [[resource.config]] |
| 102 | + hosts = "a,b,c,d,e,f,g" |
| 103 | + cores = "0-3" |
| 104 | + EOT |
| 105 | +' |
| 106 | +test_expect_success 'start a two node job with one extra rank' ' |
| 107 | + id=$(flux batch -N3 \ |
| 108 | + --broker-opts=--config-path=fake3.toml \ |
| 109 | + --broker-opts=-Ssize=7 \ |
| 110 | + --broker-opts=-Sbroker.quorum=3 \ |
| 111 | + --broker-opts=-Stbon.topo=kary:2 \ |
| 112 | + --wrap sleep inf) && |
| 113 | + flux job wait-event -t10 $id memo && |
| 114 | + flux uri $id >test3.uri |
| 115 | +' |
| 116 | +test_expect_success 'job has size 7' ' |
| 117 | + size=$(flux proxy $(cat test3.uri) flux getattr size) && |
| 118 | + test $size -eq 7 |
| 119 | +' |
| 120 | +test_expect_success 'show overlay status' ' |
| 121 | + flux proxy $(cat test3.uri) \ |
| 122 | + flux overlay status |
| 123 | +' |
| 124 | +test_expect_success 'submit a job that starts extra brokers' ' |
| 125 | + flux submit -N4 flux broker \ |
| 126 | + --config-path=fake3.toml \ |
| 127 | + -Stbon.topo=kary:2 \ |
| 128 | + -Sbroker.boot-server=$(cat test3.uri) |
| 129 | +' |
| 130 | +test_expect_success 'overlay status shows extra ranks connected' ' |
| 131 | + run_timeout 20 flux proxy $(cat test3.uri) \ |
| 132 | + flux overlay status --wait=full |
| 133 | +' |
| 134 | +test_expect_success 'run a 7 node job in the expanded instance' ' |
| 135 | + run_timeout 30 flux proxy $(cat test3.uri) \ |
| 136 | + flux run --label-io -N7 flux pmi barrier |
| 137 | +' |
| 138 | +test_expect_success 'clean up' ' |
| 139 | + flux cancel --all |
| 140 | +' |
| 141 | + |
| 142 | + |
| 143 | +test_done |
0 commit comments