Skip to content

Commit 17cc323

Browse files
committed
mgr/dashboard: fix indefinite loop in cephadm dashboard e2e
the tests seems waiting to fetch the prometheus details incase the cephadm ran into error and it just waits there for more than an hour without any progress. fixing that and some minor improvements. an example log: https://jenkins.ceph.com/job/ceph-dashboard-cephadm-e2e/12287/consoleFull Signed-off-by: Nizamudeen A <[email protected]>
1 parent 3657866 commit 17cc323

File tree

3 files changed

+36
-20
lines changed

3 files changed

+36
-20
lines changed

src/pybind/mgr/dashboard/ci/cephadm/bootstrap-cluster.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ cephadm_shell="$CEPHADM shell --fsid ${fsid} -c /etc/ceph/ceph.conf -k /etc/ceph
3232
{% for number in range(1, nodes) %}
3333
ssh-copy-id -f -i /etc/ceph/ceph.pub -o StrictHostKeyChecking=no [email protected]{{ number }}
3434
{% if expanded_cluster is defined %}
35-
${cephadm_shell} ceph orch host add {{ prefix }}-node-0{{ number }}
35+
${cephadm_shell} ceph orch host add {{ prefix }}-node-0{{ number }} 192.168.100.10{{ number }}
3636
{% endif %}
3737
{% endfor %}
3838

src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -38,22 +38,4 @@ cypress_run () {
3838

3939
cd ${CEPH_DEV_FOLDER}/src/pybind/mgr/dashboard/frontend
4040

41-
kcli ssh -u root ceph-node-00 'cephadm shell "ceph config set mgr mgr/prometheus/exclude_perf_counters false"'
42-
43-
# check if the prometheus daemon is running
44-
# before starting the e2e tests
45-
46-
PROMETHEUS_RUNNING_COUNT=$(kcli ssh -u root ceph-node-00 'cephadm shell "ceph orch ls --service_name=prometheus --format=json"' | jq -r '.[] | .status.running')
47-
while [[ $PROMETHEUS_RUNNING_COUNT -lt 1 ]]; do
48-
PROMETHEUS_RUNNING_COUNT=$(kcli ssh -u root ceph-node-00 'cephadm shell "ceph orch ls --service_name=prometheus --format=json"' | jq -r '.[] | .status.running')
49-
done
50-
51-
# grafana ip address is set to the fqdn by default.
52-
# kcli is not working with that, so setting the IP manually.
53-
kcli ssh -u root ceph-node-00 'cephadm shell "ceph dashboard set-alertmanager-api-host http://192.168.100.100:9093"'
54-
kcli ssh -u root ceph-node-00 'cephadm shell "ceph dashboard set-prometheus-api-host http://192.168.100.100:9095"'
55-
kcli ssh -u root ceph-node-00 'cephadm shell "ceph dashboard set-grafana-api-url https://192.168.100.100:3000"'
56-
kcli ssh -u root ceph-node-00 'cephadm shell "ceph orch apply node-exporter --placement 'count:2'"'
57-
58-
cypress_run ["cypress/e2e/orchestrator/workflow/*.feature","cypress/e2e/orchestrator/workflow/*-spec.ts"]
59-
cypress_run "cypress/e2e/orchestrator/grafana/*.feature"
41+
cypress_run ["cypress/e2e/orchestrator/workflow/*.feature","cypress/e2e/orchestrator/workflow/*-spec.ts","cypress/e2e/orchestrator/grafana/*.feature"]

src/pybind/mgr/dashboard/ci/cephadm/start-cluster.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,37 @@ while [[ -z $(kcli ssh -u root -- ceph-node-00 'journalctl --no-tail --no-pager
8282
fi
8383
kcli ssh -u root -- ceph-node-00 'journalctl -n 100 --no-pager -t cloud-init'
8484
done
85+
86+
kcli ssh -u root ceph-node-00 'cephadm shell "ceph config set mgr mgr/prometheus/exclude_perf_counters false"'
87+
88+
get_prometheus_running_count() {
89+
echo $(kcli ssh -u root ceph-node-00 'cephadm shell "ceph orch ls --service_name=prometheus --format=json"' | jq -r '.[] | .status.running')
90+
}
91+
92+
# check if the prometheus daemon is running on jenkins node
93+
# before starting the e2e tests
94+
if [[ -n "${JENKINS_HOME}" ]]; then
95+
retry=0
96+
PROMETHEUS_RUNNING_COUNT=$(get_prometheus_running_count)
97+
# retrying for 10 times to see if we can get the prometheus count
98+
# otherwise this would run indefinitely and bloat up the machine
99+
while [[ $retry -lt 10 && $PROMETHEUS_RUNNING_COUNT -lt 1 ]]; do
100+
if [[ ${retry} -gt 0 ]]; then
101+
echo "Retry attempt to get the prometheus count..." ${retry}
102+
fi
103+
PROMETHEUS_RUNNING_COUNT=$(get_prometheus_running_count)
104+
retry=$((retry +1))
105+
sleep 10
106+
done
107+
108+
if [[ ${retry} -ge 10 ]]; then
109+
exit 1
110+
fi
111+
112+
# grafana ip address is set to the fqdn by default.
113+
# kcli is not working with that, so setting the IP manually.
114+
kcli ssh -u root ceph-node-00 'cephadm shell "ceph dashboard set-alertmanager-api-host http://192.168.100.100:9093"'
115+
kcli ssh -u root ceph-node-00 'cephadm shell "ceph dashboard set-prometheus-api-host http://192.168.100.100:9095"'
116+
kcli ssh -u root ceph-node-00 'cephadm shell "ceph dashboard set-grafana-api-url https://192.168.100.100:3000"'
117+
kcli ssh -u root ceph-node-00 'cephadm shell "ceph orch apply node-exporter --placement 'count:2'"'
118+
fi

0 commit comments

Comments
 (0)