Skip to content

Commit ded9b41

Browse files
committed
mgr/cephadm: Improving error handling and logging for mgmt-gw test
Signed-off-by: Redouane Kachach <[email protected]>
1 parent 0885546 commit ded9b41

File tree

1 file changed

+33
-9
lines changed

1 file changed

+33
-9
lines changed

qa/suites/orch/cephadm/workunits/task/test_mgmt_gateway.yaml

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ tasks:
4848
enable_health_check_endpoint: True
4949
EOT
5050
# Add generated certificates to spec file
51-
echo " ssl_cert: |" >> /tmp/mgmt.spec
51+
echo " ssl_cert: |" >> /tmp/mgmt.spec
5252
while read LINE; do echo $LINE | sed -e "s/^/ /"; done < /tmp/cert.pem >> /tmp/mgmt.spec
5353
echo " ssl_key: |" >> /tmp/mgmt.spec
5454
while read LINE; do echo $LINE | sed -e "s/^/ /"; done < /tmp/key.pem >> /tmp/mgmt.spec
@@ -60,18 +60,42 @@ tasks:
6060
host.a:
6161
- |
6262
set -ex
63+
64+
# Function to wait for a service to be healthy and log response on error
65+
wait_for_service() {
66+
local name="$1"
67+
local url="$2"
68+
local jq_filter="$3"
69+
70+
echo "Waiting for service $name to be healthy at $url..."
71+
for i in {1..30}; do
72+
local response
73+
response=$(curl -k -s -u admin:admin "$url")
74+
if echo "$response" | jq -e "$jq_filter" > /dev/null; then
75+
echo "Service $name is healthy."
76+
return 0
77+
fi
78+
echo "Attempt $i: service $name not ready yet"
79+
sleep 10
80+
done
81+
82+
echo "Timeout waiting for $name at $url"
83+
echo "Last HTTP response:"
84+
echo "$response"
85+
echo "jq output:"
86+
echo "$response" | jq "$jq_filter" || echo "(jq parse error or no match)"
87+
return 1
88+
}
89+
6390
# retrieve mgmt hostname and ip
6491
MGMT_GTW_HOST=$(ceph orch ps --daemon-type mgmt-gateway -f json | jq -e '.[]' | jq -r '.hostname')
6592
MGMT_GTW_IP=$(ceph orch host ls -f json | jq -r --arg MGMT_GTW_HOST "$MGMT_GTW_HOST" '.[] | select(.hostname==$MGMT_GTW_HOST) | .addr')
93+
6694
# check mgmt-gateway health
6795
curl -k -s https://${MGMT_GTW_IP}/health
6896
curl -k -s https://${MGMT_GTW_IP}:29443/health
69-
# wait for background services to be reconfigured following mgmt-gateway installation
70-
sleep 180
71-
# check grafana endpoints are responsive and database health is okay
72-
curl -k -s https://${MGMT_GTW_IP}/grafana/api/health | jq -e '.database == "ok"'
73-
# check prometheus endpoints are responsive
74-
curl -k -s -u admin:admin https://${MGMT_GTW_IP}/prometheus/api/v1/status/config | jq -e '.status == "success"'
75-
# check alertmanager endpoints are responsive
76-
curl -k -s -u admin:admin https://${MGMT_GTW_IP}/alertmanager/api/v2/status
7797
98+
# wait for monitoring services
99+
wait_for_service "Grafana" "https://${MGMT_GTW_IP}/grafana/api/health" '.database == "ok"' || exit 1
100+
wait_for_service "Prometheus" "https://${MGMT_GTW_IP}/prometheus/api/v1/status/config" '.status == "success"' || exit 1
101+
wait_for_service "Alertmanager" "https://${MGMT_GTW_IP}/alertmanager/api/v2/status" '.cluster.status == "ready"' || exit 1

0 commit comments

Comments
 (0)