Skip to content

Commit ef20468

Browse files
Merge pull request #302 from openshift-cherrypick-robot/cherry-pick-298-to-18.0-fr1
[18.0-fr1] Improve tracking and error reporting of startup probe
2 parents f248f6c + f63c86a commit ef20468

File tree

5 files changed

+205
-16
lines changed

5 files changed

+205
-16
lines changed

pkg/mariadb/const.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,7 @@ const (
66

77
// ActivePodSelectorKey - Selector key used to configure A/P service behavior
88
ActivePodSelectorKey = "statefulset.kubernetes.io/pod-name"
9+
10+
// Time allowed during a the startup probe (in seconds)
11+
StartupProbeTimeout = 240
912
)

pkg/mariadb/statefulset.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package mariadb
22

33
import (
4+
"strconv"
5+
46
common "github.com/openstack-k8s-operators/lib-common/modules/common"
57
"github.com/openstack-k8s-operators/lib-common/modules/common/affinity"
68
mariadbv1 "github.com/openstack-k8s-operators/mariadb-operator/api/v1beta1"
@@ -112,6 +114,7 @@ func getGaleraInitContainers(g *mariadbv1.Galera) []corev1.Container {
112114
}
113115

114116
func getGaleraContainers(g *mariadbv1.Galera, configHash string) []corev1.Container {
117+
timeout := strconv.Itoa(StartupProbeTimeout)
115118
containers := []corev1.Container{{
116119
Image: g.Spec.ContainerImage,
117120
Name: "galera",
@@ -144,11 +147,13 @@ func getGaleraContainers(g *mariadbv1.Galera, configHash string) []corev1.Contai
144147
StartupProbe: &corev1.Probe{
145148
ProbeHandler: corev1.ProbeHandler{
146149
Exec: &corev1.ExecAction{
147-
Command: []string{"/bin/bash", "/var/lib/operator-scripts/mysql_probe.sh", "startup"},
150+
Command: []string{"/bin/bash", "/var/lib/operator-scripts/mysql_probe.sh", "startup", timeout},
148151
},
149152
},
150-
PeriodSeconds: 10,
151-
FailureThreshold: 30,
153+
// extra seconds so that the script is not preempted by k8s
154+
TimeoutSeconds: StartupProbeTimeout + 10,
155+
// the current probe implementation assumes a single failure threshold
156+
FailureThreshold: 1,
152157
},
153158
LivenessProbe: &corev1.Probe{
154159
ProbeHandler: corev1.ProbeHandler{

templates/galera/bin/mysql_probe.sh

Lines changed: 192 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,212 @@ read -s -u 3 3< /var/lib/secrets/dbpassword MYSQL_PWD || true
66
export MYSQL_PWD
77

88
PROBE_USER=root
9-
function mysql_status_check {
9+
10+
MYSQL_SOCKET=/var/lib/mysql/mysql.sock
11+
SST_IN_PROGRESS=/var/lib/mysql/sst_in_progress
12+
13+
CHECK_RETRY=10
14+
CHECK_WAIT=0.5
15+
STARTUP_WAIT=2
16+
17+
LAST_STATE=""
18+
function log_state {
19+
local state="$1"
20+
# do not duplicate error logs in the probe, to minimize the
21+
# output in k8s events in case the probe fails
22+
if [ "${LAST_STATE}" != "${state}" ]; then
23+
LAST_STATE="${state}"
24+
fi
25+
}
26+
27+
function log_last_state {
28+
if [ -n "${LAST_STATE}" ]; then
29+
echo "${LAST_STATE}"
30+
fi
31+
}
32+
trap log_last_state EXIT
33+
34+
function get_mysql_status {
35+
local status=$1
36+
local i
37+
local out
38+
for i in $(seq $CHECK_RETRY); do
39+
out=$(mysql -u${PROBE_USER} -sNEe "show status like '${status}';" 2>&1)
40+
if [ $? -eq 0 ]; then
41+
echo "${out}" | tail -1
42+
return 0
43+
else
44+
sleep ${CHECK_WAIT}
45+
fi
46+
done
47+
# if we pass here, log the last error from mysql
48+
echo "${out}" >&2
49+
return 1
50+
}
51+
52+
function check_mysql_status {
1053
local status=$1
1154
local expect=$2
12-
set -x
13-
mysql -u${PROBE_USER} -sNEe "show status like '${status}';" | tail -1 | grep -w -e "${expect}"
55+
local val
56+
local rc
57+
58+
val=$(get_mysql_status "${status}")
59+
test "${val}" = "${expect}"
60+
rc=$?
61+
if [ $rc -ne 0 ]; then
62+
log_state "${status} (${val}) differs from ${expect}"
63+
fi
64+
return $rc
1465
}
1566

16-
# Consider the pod has "started" once mysql is reachable
17-
# and is part of the primary partition
18-
if [ "$1" = "startup" ]; then
19-
mysql_status_check wsrep_cluster_status Primary
20-
exit $?
21-
fi
67+
function check_sst_in_progress {
68+
local i
69+
# retry to give some time to mysql to set up the SST
70+
for i in $(seq $CHECK_RETRY); do
71+
if [ -e ${MYSQL_SOCKET} ]; then
72+
return 1
73+
elif [ -e ${SST_IN_PROGRESS} ]; then
74+
return 0
75+
else
76+
sleep ${CHECK_WAIT}
77+
fi
78+
done
79+
return 1
80+
}
2281

82+
function check_mysql_ready {
83+
local i
84+
# retry to give some time to mysql to create its socket
85+
for i in $(seq $CHECK_RETRY); do
86+
if [ -e ${MYSQL_SOCKET} ] && mysqladmin -s -u${PROBE_USER} ping >dev/null; then
87+
return 0
88+
else
89+
sleep ${CHECK_WAIT}
90+
fi
91+
done
92+
return 1
93+
}
94+
95+
# Monitor the startup sequence until the galera node is connected
96+
# to a primary component and synced
97+
# NOTE: as of mariadb 10.5, if mysql connects to a non-primary
98+
# partition, it never creates any socket and gets stuck indefinitely.
99+
# In that case, in order to not wait until the startup times out
100+
# (very long), we error out of the probe so that the pod can restart
101+
# and mysql reconnect to a primary partition if possible.
102+
function check_mysql_startup {
103+
# mysql initialization sequence:
104+
# . mysql connects to a remote galera node over port 4567
105+
# . mysql optionally runs a SST (port 4444), SST marker created on disk
106+
# . only at this point, InnoDB is initialized, mysql pidfile and
107+
# mysql socket are created on disk
108+
109+
if pgrep -f detect_gcomm_and_start.sh >/dev/null ; then
110+
log_state "waiting for gcomm URI"
111+
return 1
112+
fi
113+
# pidfile is not written on disk until mysql is ready,
114+
# so look for the mysqld process instead
115+
if ! pgrep -f /usr/libexec/mysqld >/dev/null ; then
116+
log_state "waiting for mysql to start"
117+
return 1
118+
fi
119+
120+
# a bootstrap node must be reachable from the CLI to finish startup
121+
if pgrep -f -- '--wsrep-cluster-address=gcomm://(\W|$)' >/dev/null; then
122+
check_mysql_ready
123+
return $?
124+
# a joiner node must have an established socket connection before testing further
125+
elif pgrep -f -- '--wsrep-cluster-address=gcomm://\w' >/dev/null; then
126+
local connections
127+
connections=$(ss -tnH state established src :4567 or dst :4567 | wc -l)
128+
if ! test "${connections}" -ge 0; then
129+
log_state "waiting for mysql to join a galera cluster"
130+
return 1
131+
fi
132+
else
133+
log_state "could not determine galera startup mode"
134+
exit 1
135+
fi
136+
137+
# a joiner node requires additional startup checks
138+
if [ -e /var/lib/mysql/mysql.sock ]; then
139+
# good case, mysql is ready to be probed from the CLI
140+
# check WSREP status like the regular liveness probe
141+
local status
142+
local comment
143+
status=$(get_mysql_status wsrep_cluster_status)
144+
comment=$(get_mysql_status wsrep_local_state_comment)
145+
if [ "${status}" = "Primary" -a "${comment}" = "Synced" ]; then
146+
return 0
147+
elif [ "${status}" = "Primary" ]; then
148+
log_state "waiting to be synced with the cluster"
149+
return 1
150+
elif [ "${status}" = "Non-primary" -a "${comment}" = "Synced"]; then
151+
log_state "mysql is connected to a non-primary partition, server stopped"
152+
exit 1
153+
else
154+
log_state "waiting for connection to a primary partition"
155+
return 1
156+
fi
157+
else
158+
# if there is no socket, mysql may be running an SST...
159+
if check_sst_in_progress; then
160+
log_state "waiting for SST to finish"
161+
return 1
162+
fi
163+
164+
# ... if no SST was detected, it may have finished before
165+
# we probed it. Check a last time whether we can connect to mysql
166+
if check_mysql_ready; then
167+
return 0
168+
fi
169+
170+
# At this stage, mysql is either trying to connect to a boostrap node
171+
# that resolved to an old pod IP, or it is is connected to a
172+
# non-primary partition. Either way, this is not recoverable, so
173+
# make the probe fail and let k8s kill the mysql server.
174+
175+
log_state "could not find a primary partition to connect to"
176+
exit 1
177+
fi
178+
return 1
179+
}
180+
181+
182+
# startup probe loops until the node started or joined a galera cluster
23183
# readiness and liveness probes are run by k8s only after start probe succeeded
24184

25185
case "$1" in
186+
startup)
187+
if [ -z "$2" ]; then
188+
echo "startup timeout option missing"
189+
exit 1
190+
fi
191+
TIME_TIMEOUT=$2
192+
193+
# Run the entire check in a single startup probe to avoid spurious
194+
# "Unhealthy" k8s events to be logged. The probe stops in error
195+
# if the startup timeout is reached
196+
rc=1
197+
while [ $rc -ne 0 ]; do
198+
if check_mysql_startup; then
199+
exit 0
200+
else
201+
sleep ${STARTUP_WAIT};
202+
[ $SECONDS -ge $TIME_TIMEOUT ] && exit 1
203+
fi
204+
done
205+
exit $rc
206+
;;
26207
readiness)
27208
# If the node is e.g. a donor, it cannot serve traffic
28-
mysql_status_check wsrep_local_state_comment Synced
209+
check_mysql_status wsrep_local_state_comment Synced
29210
;;
30211
liveness)
31212
# If the node is not in the primary partition, the failed liveness probe
32213
# will make k8s restart this pod
33-
mysql_status_check wsrep_cluster_status Primary
214+
check_mysql_status wsrep_cluster_status Primary
34215
;;
35216
*)
36217
echo "Invalid probe option '$1'"

templates/galera/config/galera.cnf.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ wsrep_debug = 0
4747
wsrep_drupal_282555_workaround = 0
4848
wsrep_on = ON
4949
wsrep_provider = /usr/lib64/galera/libgalera_smm.so
50-
wsrep_provider_options = gmcast.listen_addr=tcp://{ PODIP }:4567
50+
wsrep_provider_options = pc.wait_prim=FALSE;gcache.recover=no;gmcast.listen_addr=tcp://{ PODIP }:4567
5151
wsrep_retry_autocommit = 1
5252
wsrep_slave_threads = 1
5353
wsrep_sst_method = rsync

templates/galera/config/galera_tls.cnf.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ssl-cert = /etc/pki/tls/certs/galera.crt
44
ssl-key = /etc/pki/tls/private/galera.key
55
ssl-ca = /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem
66
ssl-cipher = !SSLv2:kEECDH:kRSA:kEDH:kPSK:+3DES:!aNULL:!eNULL:!MD5:!EXP:!RC4:!SEED:!IDEA:!DES:!SSLv3:!TLSv1
7-
wsrep_provider_options = gcache.recover=no;gmcast.listen_addr=tcp://{ PODIP }:4567;socket.ssl_key=/etc/pki/tls/private/galera.key;socket.ssl_cert=/etc/pki/tls/certs/galera.crt;socket.ssl_cipher={ SSL_CIPHER };socket.ssl_ca=/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem;
7+
wsrep_provider_options = pc.wait_prim=FALSE;gcache.recover=no;gmcast.listen_addr=tcp://{ PODIP }:4567;socket.ssl_key=/etc/pki/tls/private/galera.key;socket.ssl_cert=/etc/pki/tls/certs/galera.crt;socket.ssl_cipher={ SSL_CIPHER };socket.ssl_ca=/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem;
88

99
[sst]
1010
sockopt = cipher=!SSLv2:kEECDH:kRSA:kEDH:kPSK:+3DES:!aNULL:!eNULL:!MD5:!EXP:!RC4:!SEED:!IDEA:!DES:!SSLv3:!TLSv1

0 commit comments

Comments
 (0)