|
16 | 16 | _number_of_computes: 0 |
17 | 17 | _retries: 25 |
18 | 18 | _cell_conductor: null |
| 19 | + # Retry settings for oc commands to handle transient auth failures |
| 20 | + _oc_retries: 5 |
| 21 | + _oc_delay: 30 |
19 | 22 | environment: |
20 | 23 | KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" |
21 | 24 | PATH: "{{ cifmw_path }}" |
|
29 | 32 | COMPUTES={{ _number_of_computes }} |
30 | 33 | RETRIES={{ _retries }} |
31 | 34 | COUNTER=0 |
32 | | - oc project {{ namespace }} |
| 35 | + OC_RETRIES={{ _oc_retries }} |
| 36 | + OC_DELAY={{ _oc_delay }} |
| 37 | +
|
| 38 | + # Retry oc project command to handle transient auth failures |
| 39 | + oc_retry_counter=0 |
| 40 | + until oc project {{ namespace }}; do |
| 41 | + if [[ "$oc_retry_counter" -ge "$OC_RETRIES" ]]; then |
| 42 | + echo "Failed to authenticate with OpenShift after $OC_RETRIES attempts" |
| 43 | + exit 1 |
| 44 | + fi |
| 45 | + oc_retry_counter=$[$oc_retry_counter +1] |
| 46 | + echo "OpenShift auth failed, retrying in ${OC_DELAY}s (attempt $oc_retry_counter/$OC_RETRIES)" |
| 47 | + sleep $OC_DELAY |
| 48 | + done |
| 49 | +
|
33 | 50 | until [ $(oc rsh openstackclient openstack compute service list --service nova-compute -f value | wc -l) -eq "$COMPUTES" ]; do |
34 | 51 | if [[ "$COUNTER" -ge "$RETRIES" ]]; then |
35 | 52 | exit 1 |
36 | 53 | fi |
37 | 54 | COUNTER=$[$COUNTER +1] |
38 | 55 | sleep 10 |
39 | 56 | done |
| 57 | +
|
40 | 58 | - name: Run nova-manage discover_hosts and wait for host records |
41 | 59 | cifmw.general.ci_script: |
42 | 60 | output_dir: "{{ cifmw_basedir }}/artifacts" |
|
46 | 64 | COMPUTES={{ _number_of_computes | int + 4 }} |
47 | 65 | RETRIES={{ _retries }} |
48 | 66 | COUNTER=0 |
49 | | - oc project {{ namespace }} |
| 67 | + OC_RETRIES={{ _oc_retries }} |
| 68 | + OC_DELAY={{ _oc_delay }} |
| 69 | +
|
| 70 | + # Retry oc project command to handle transient auth failures |
| 71 | + oc_retry_counter=0 |
| 72 | + until oc project {{ namespace }}; do |
| 73 | + if [[ "$oc_retry_counter" -ge "$OC_RETRIES" ]]; then |
| 74 | + echo "Failed to authenticate with OpenShift after $OC_RETRIES attempts" |
| 75 | + exit 1 |
| 76 | + fi |
| 77 | + oc_retry_counter=$[$oc_retry_counter +1] |
| 78 | + echo "OpenShift auth failed, retrying in ${OC_DELAY}s (attempt $oc_retry_counter/$OC_RETRIES)" |
| 79 | + sleep $OC_DELAY |
| 80 | + done |
| 81 | +
|
50 | 82 | until [ $(oc rsh {{ _cell_conductor }} nova-manage cell_v2 list_hosts | wc -l) -eq "$COMPUTES" ]; do |
51 | 83 | if [[ "$COUNTER" -ge "$RETRIES" ]]; then |
52 | 84 | exit 1 |
|
0 commit comments